[AArch64] Implement vbsl_f64 arm_neon.h intrinsic.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob66968e8d198f11ea9667f689924618e1b389c2c1
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef __builtin_aarch64_simd_di int64x1_t
42 __attribute__ ((__vector_size__ (8)));
43 typedef int32_t int32x1_t;
44 typedef int16_t int16x1_t;
45 typedef int8_t int8x1_t;
46 typedef __builtin_aarch64_simd_df float64x1_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_sf float32x2_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly8 poly8x8_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_poly16 poly16x4_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uqi uint8x8_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_uhi uint16x4_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef __builtin_aarch64_simd_usi uint32x2_t
59 __attribute__ ((__vector_size__ (8)));
60 typedef __builtin_aarch64_simd_udi uint64x1_t
61 __attribute__ ((__vector_size__ (8)));
62 typedef uint32_t uint32x1_t;
63 typedef uint16_t uint16x1_t;
64 typedef uint8_t uint8x1_t;
65 typedef __builtin_aarch64_simd_qi int8x16_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_hi int16x8_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_si int32x4_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_di int64x2_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_sf float32x4_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_df float64x2_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_poly8 poly8x16_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_poly16 poly16x8_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_poly64 poly64x2_t
82 __attribute__ ((__vector_size__ (16)));
83 typedef __builtin_aarch64_simd_uqi uint8x16_t
84 __attribute__ ((__vector_size__ (16)));
85 typedef __builtin_aarch64_simd_uhi uint16x8_t
86 __attribute__ ((__vector_size__ (16)));
87 typedef __builtin_aarch64_simd_usi uint32x4_t
88 __attribute__ ((__vector_size__ (16)));
89 typedef __builtin_aarch64_simd_udi uint64x2_t
90 __attribute__ ((__vector_size__ (16)));
92 typedef float float32_t;
93 typedef double float64_t;
94 typedef __builtin_aarch64_simd_poly8 poly8_t;
95 typedef __builtin_aarch64_simd_poly16 poly16_t;
96 typedef __builtin_aarch64_simd_poly64 poly64_t;
97 typedef __builtin_aarch64_simd_poly128 poly128_t;
99 typedef struct int8x8x2_t
101 int8x8_t val[2];
102 } int8x8x2_t;
104 typedef struct int8x16x2_t
106 int8x16_t val[2];
107 } int8x16x2_t;
109 typedef struct int16x4x2_t
111 int16x4_t val[2];
112 } int16x4x2_t;
114 typedef struct int16x8x2_t
116 int16x8_t val[2];
117 } int16x8x2_t;
119 typedef struct int32x2x2_t
121 int32x2_t val[2];
122 } int32x2x2_t;
124 typedef struct int32x4x2_t
126 int32x4_t val[2];
127 } int32x4x2_t;
129 typedef struct int64x1x2_t
131 int64x1_t val[2];
132 } int64x1x2_t;
134 typedef struct int64x2x2_t
136 int64x2_t val[2];
137 } int64x2x2_t;
139 typedef struct uint8x8x2_t
141 uint8x8_t val[2];
142 } uint8x8x2_t;
144 typedef struct uint8x16x2_t
146 uint8x16_t val[2];
147 } uint8x16x2_t;
149 typedef struct uint16x4x2_t
151 uint16x4_t val[2];
152 } uint16x4x2_t;
154 typedef struct uint16x8x2_t
156 uint16x8_t val[2];
157 } uint16x8x2_t;
159 typedef struct uint32x2x2_t
161 uint32x2_t val[2];
162 } uint32x2x2_t;
164 typedef struct uint32x4x2_t
166 uint32x4_t val[2];
167 } uint32x4x2_t;
169 typedef struct uint64x1x2_t
171 uint64x1_t val[2];
172 } uint64x1x2_t;
174 typedef struct uint64x2x2_t
176 uint64x2_t val[2];
177 } uint64x2x2_t;
179 typedef struct float32x2x2_t
181 float32x2_t val[2];
182 } float32x2x2_t;
184 typedef struct float32x4x2_t
186 float32x4_t val[2];
187 } float32x4x2_t;
189 typedef struct float64x2x2_t
191 float64x2_t val[2];
192 } float64x2x2_t;
194 typedef struct float64x1x2_t
196 float64x1_t val[2];
197 } float64x1x2_t;
199 typedef struct poly8x8x2_t
201 poly8x8_t val[2];
202 } poly8x8x2_t;
204 typedef struct poly8x16x2_t
206 poly8x16_t val[2];
207 } poly8x16x2_t;
209 typedef struct poly16x4x2_t
211 poly16x4_t val[2];
212 } poly16x4x2_t;
214 typedef struct poly16x8x2_t
216 poly16x8_t val[2];
217 } poly16x8x2_t;
219 typedef struct int8x8x3_t
221 int8x8_t val[3];
222 } int8x8x3_t;
224 typedef struct int8x16x3_t
226 int8x16_t val[3];
227 } int8x16x3_t;
229 typedef struct int16x4x3_t
231 int16x4_t val[3];
232 } int16x4x3_t;
234 typedef struct int16x8x3_t
236 int16x8_t val[3];
237 } int16x8x3_t;
239 typedef struct int32x2x3_t
241 int32x2_t val[3];
242 } int32x2x3_t;
244 typedef struct int32x4x3_t
246 int32x4_t val[3];
247 } int32x4x3_t;
249 typedef struct int64x1x3_t
251 int64x1_t val[3];
252 } int64x1x3_t;
254 typedef struct int64x2x3_t
256 int64x2_t val[3];
257 } int64x2x3_t;
259 typedef struct uint8x8x3_t
261 uint8x8_t val[3];
262 } uint8x8x3_t;
264 typedef struct uint8x16x3_t
266 uint8x16_t val[3];
267 } uint8x16x3_t;
269 typedef struct uint16x4x3_t
271 uint16x4_t val[3];
272 } uint16x4x3_t;
274 typedef struct uint16x8x3_t
276 uint16x8_t val[3];
277 } uint16x8x3_t;
279 typedef struct uint32x2x3_t
281 uint32x2_t val[3];
282 } uint32x2x3_t;
284 typedef struct uint32x4x3_t
286 uint32x4_t val[3];
287 } uint32x4x3_t;
289 typedef struct uint64x1x3_t
291 uint64x1_t val[3];
292 } uint64x1x3_t;
294 typedef struct uint64x2x3_t
296 uint64x2_t val[3];
297 } uint64x2x3_t;
299 typedef struct float32x2x3_t
301 float32x2_t val[3];
302 } float32x2x3_t;
304 typedef struct float32x4x3_t
306 float32x4_t val[3];
307 } float32x4x3_t;
309 typedef struct float64x2x3_t
311 float64x2_t val[3];
312 } float64x2x3_t;
314 typedef struct float64x1x3_t
316 float64x1_t val[3];
317 } float64x1x3_t;
319 typedef struct poly8x8x3_t
321 poly8x8_t val[3];
322 } poly8x8x3_t;
324 typedef struct poly8x16x3_t
326 poly8x16_t val[3];
327 } poly8x16x3_t;
329 typedef struct poly16x4x3_t
331 poly16x4_t val[3];
332 } poly16x4x3_t;
334 typedef struct poly16x8x3_t
336 poly16x8_t val[3];
337 } poly16x8x3_t;
339 typedef struct int8x8x4_t
341 int8x8_t val[4];
342 } int8x8x4_t;
344 typedef struct int8x16x4_t
346 int8x16_t val[4];
347 } int8x16x4_t;
349 typedef struct int16x4x4_t
351 int16x4_t val[4];
352 } int16x4x4_t;
354 typedef struct int16x8x4_t
356 int16x8_t val[4];
357 } int16x8x4_t;
359 typedef struct int32x2x4_t
361 int32x2_t val[4];
362 } int32x2x4_t;
364 typedef struct int32x4x4_t
366 int32x4_t val[4];
367 } int32x4x4_t;
369 typedef struct int64x1x4_t
371 int64x1_t val[4];
372 } int64x1x4_t;
374 typedef struct int64x2x4_t
376 int64x2_t val[4];
377 } int64x2x4_t;
379 typedef struct uint8x8x4_t
381 uint8x8_t val[4];
382 } uint8x8x4_t;
384 typedef struct uint8x16x4_t
386 uint8x16_t val[4];
387 } uint8x16x4_t;
389 typedef struct uint16x4x4_t
391 uint16x4_t val[4];
392 } uint16x4x4_t;
394 typedef struct uint16x8x4_t
396 uint16x8_t val[4];
397 } uint16x8x4_t;
399 typedef struct uint32x2x4_t
401 uint32x2_t val[4];
402 } uint32x2x4_t;
404 typedef struct uint32x4x4_t
406 uint32x4_t val[4];
407 } uint32x4x4_t;
409 typedef struct uint64x1x4_t
411 uint64x1_t val[4];
412 } uint64x1x4_t;
414 typedef struct uint64x2x4_t
416 uint64x2_t val[4];
417 } uint64x2x4_t;
419 typedef struct float32x2x4_t
421 float32x2_t val[4];
422 } float32x2x4_t;
424 typedef struct float32x4x4_t
426 float32x4_t val[4];
427 } float32x4x4_t;
429 typedef struct float64x2x4_t
431 float64x2_t val[4];
432 } float64x2x4_t;
434 typedef struct float64x1x4_t
436 float64x1_t val[4];
437 } float64x1x4_t;
439 typedef struct poly8x8x4_t
441 poly8x8_t val[4];
442 } poly8x8x4_t;
444 typedef struct poly8x16x4_t
446 poly8x16_t val[4];
447 } poly8x16x4_t;
449 typedef struct poly16x4x4_t
451 poly16x4_t val[4];
452 } poly16x4x4_t;
454 typedef struct poly16x8x4_t
456 poly16x8_t val[4];
457 } poly16x8x4_t;
459 /* vget_lane internal macros. */
461 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
462 (__cast_ret \
463 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
465 #define __aarch64_vget_lane_f32(__a, __b) \
466 __aarch64_vget_lane_any (v2sf, , , __a, __b)
467 #define __aarch64_vget_lane_f64(__a, __b) __extension__ \
468 ({ \
469 __builtin_aarch64_im_lane_boundsi (__b, 1); \
470 __a[0]; \
473 #define __aarch64_vget_lane_p8(__a, __b) \
474 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
475 #define __aarch64_vget_lane_p16(__a, __b) \
476 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
478 #define __aarch64_vget_lane_s8(__a, __b) \
479 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
480 #define __aarch64_vget_lane_s16(__a, __b) \
481 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
482 #define __aarch64_vget_lane_s32(__a, __b) \
483 __aarch64_vget_lane_any (v2si, , ,__a, __b)
484 #define __aarch64_vget_lane_s64(__a, __b) __extension__ \
485 ({ \
486 __builtin_aarch64_im_lane_boundsi (__b, 1); \
487 __a[0]; \
490 #define __aarch64_vget_lane_u8(__a, __b) \
491 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
492 #define __aarch64_vget_lane_u16(__a, __b) \
493 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
494 #define __aarch64_vget_lane_u32(__a, __b) \
495 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
496 #define __aarch64_vget_lane_u64(__a, __b) __extension__ \
497 ({ \
498 __builtin_aarch64_im_lane_boundsi (__b, 1); \
499 __a[0]; \
502 #define __aarch64_vgetq_lane_f32(__a, __b) \
503 __aarch64_vget_lane_any (v4sf, , , __a, __b)
504 #define __aarch64_vgetq_lane_f64(__a, __b) \
505 __aarch64_vget_lane_any (v2df, , , __a, __b)
507 #define __aarch64_vgetq_lane_p8(__a, __b) \
508 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
509 #define __aarch64_vgetq_lane_p16(__a, __b) \
510 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
512 #define __aarch64_vgetq_lane_s8(__a, __b) \
513 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
514 #define __aarch64_vgetq_lane_s16(__a, __b) \
515 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
516 #define __aarch64_vgetq_lane_s32(__a, __b) \
517 __aarch64_vget_lane_any (v4si, , ,__a, __b)
518 #define __aarch64_vgetq_lane_s64(__a, __b) \
519 __aarch64_vget_lane_any (v2di, , ,__a, __b)
521 #define __aarch64_vgetq_lane_u8(__a, __b) \
522 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
523 #define __aarch64_vgetq_lane_u16(__a, __b) \
524 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
525 #define __aarch64_vgetq_lane_u32(__a, __b) \
526 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
527 #define __aarch64_vgetq_lane_u64(__a, __b) \
528 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
530 /* __aarch64_vdup_lane internal macros. */
531 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
532 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
534 #define __aarch64_vdup_lane_f32(__a, __b) \
535 __aarch64_vdup_lane_any (f32, , , __a, __b)
536 #define __aarch64_vdup_lane_f64(__a, __b) \
537 __aarch64_vdup_lane_any (f64, , , __a, __b)
538 #define __aarch64_vdup_lane_p8(__a, __b) \
539 __aarch64_vdup_lane_any (p8, , , __a, __b)
540 #define __aarch64_vdup_lane_p16(__a, __b) \
541 __aarch64_vdup_lane_any (p16, , , __a, __b)
542 #define __aarch64_vdup_lane_s8(__a, __b) \
543 __aarch64_vdup_lane_any (s8, , , __a, __b)
544 #define __aarch64_vdup_lane_s16(__a, __b) \
545 __aarch64_vdup_lane_any (s16, , , __a, __b)
546 #define __aarch64_vdup_lane_s32(__a, __b) \
547 __aarch64_vdup_lane_any (s32, , , __a, __b)
548 #define __aarch64_vdup_lane_s64(__a, __b) \
549 __aarch64_vdup_lane_any (s64, , , __a, __b)
550 #define __aarch64_vdup_lane_u8(__a, __b) \
551 __aarch64_vdup_lane_any (u8, , , __a, __b)
552 #define __aarch64_vdup_lane_u16(__a, __b) \
553 __aarch64_vdup_lane_any (u16, , , __a, __b)
554 #define __aarch64_vdup_lane_u32(__a, __b) \
555 __aarch64_vdup_lane_any (u32, , , __a, __b)
556 #define __aarch64_vdup_lane_u64(__a, __b) \
557 __aarch64_vdup_lane_any (u64, , , __a, __b)
559 /* __aarch64_vdup_laneq internal macros. */
560 #define __aarch64_vdup_laneq_f32(__a, __b) \
561 __aarch64_vdup_lane_any (f32, , q, __a, __b)
562 #define __aarch64_vdup_laneq_f64(__a, __b) \
563 __aarch64_vdup_lane_any (f64, , q, __a, __b)
564 #define __aarch64_vdup_laneq_p8(__a, __b) \
565 __aarch64_vdup_lane_any (p8, , q, __a, __b)
566 #define __aarch64_vdup_laneq_p16(__a, __b) \
567 __aarch64_vdup_lane_any (p16, , q, __a, __b)
568 #define __aarch64_vdup_laneq_s8(__a, __b) \
569 __aarch64_vdup_lane_any (s8, , q, __a, __b)
570 #define __aarch64_vdup_laneq_s16(__a, __b) \
571 __aarch64_vdup_lane_any (s16, , q, __a, __b)
572 #define __aarch64_vdup_laneq_s32(__a, __b) \
573 __aarch64_vdup_lane_any (s32, , q, __a, __b)
574 #define __aarch64_vdup_laneq_s64(__a, __b) \
575 __aarch64_vdup_lane_any (s64, , q, __a, __b)
576 #define __aarch64_vdup_laneq_u8(__a, __b) \
577 __aarch64_vdup_lane_any (u8, , q, __a, __b)
578 #define __aarch64_vdup_laneq_u16(__a, __b) \
579 __aarch64_vdup_lane_any (u16, , q, __a, __b)
580 #define __aarch64_vdup_laneq_u32(__a, __b) \
581 __aarch64_vdup_lane_any (u32, , q, __a, __b)
582 #define __aarch64_vdup_laneq_u64(__a, __b) \
583 __aarch64_vdup_lane_any (u64, , q, __a, __b)
585 /* __aarch64_vdupq_lane internal macros. */
586 #define __aarch64_vdupq_lane_f32(__a, __b) \
587 __aarch64_vdup_lane_any (f32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_f64(__a, __b) \
589 __aarch64_vdup_lane_any (f64, q, , __a, __b)
590 #define __aarch64_vdupq_lane_p8(__a, __b) \
591 __aarch64_vdup_lane_any (p8, q, , __a, __b)
592 #define __aarch64_vdupq_lane_p16(__a, __b) \
593 __aarch64_vdup_lane_any (p16, q, , __a, __b)
594 #define __aarch64_vdupq_lane_s8(__a, __b) \
595 __aarch64_vdup_lane_any (s8, q, , __a, __b)
596 #define __aarch64_vdupq_lane_s16(__a, __b) \
597 __aarch64_vdup_lane_any (s16, q, , __a, __b)
598 #define __aarch64_vdupq_lane_s32(__a, __b) \
599 __aarch64_vdup_lane_any (s32, q, , __a, __b)
600 #define __aarch64_vdupq_lane_s64(__a, __b) \
601 __aarch64_vdup_lane_any (s64, q, , __a, __b)
602 #define __aarch64_vdupq_lane_u8(__a, __b) \
603 __aarch64_vdup_lane_any (u8, q, , __a, __b)
604 #define __aarch64_vdupq_lane_u16(__a, __b) \
605 __aarch64_vdup_lane_any (u16, q, , __a, __b)
606 #define __aarch64_vdupq_lane_u32(__a, __b) \
607 __aarch64_vdup_lane_any (u32, q, , __a, __b)
608 #define __aarch64_vdupq_lane_u64(__a, __b) \
609 __aarch64_vdup_lane_any (u64, q, , __a, __b)
611 /* __aarch64_vdupq_laneq internal macros. */
612 #define __aarch64_vdupq_laneq_f32(__a, __b) \
613 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
614 #define __aarch64_vdupq_laneq_f64(__a, __b) \
615 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
616 #define __aarch64_vdupq_laneq_p8(__a, __b) \
617 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
618 #define __aarch64_vdupq_laneq_p16(__a, __b) \
619 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
620 #define __aarch64_vdupq_laneq_s8(__a, __b) \
621 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
622 #define __aarch64_vdupq_laneq_s16(__a, __b) \
623 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
624 #define __aarch64_vdupq_laneq_s32(__a, __b) \
625 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
626 #define __aarch64_vdupq_laneq_s64(__a, __b) \
627 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
628 #define __aarch64_vdupq_laneq_u8(__a, __b) \
629 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
630 #define __aarch64_vdupq_laneq_u16(__a, __b) \
631 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
632 #define __aarch64_vdupq_laneq_u32(__a, __b) \
633 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
634 #define __aarch64_vdupq_laneq_u64(__a, __b) \
635 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
637 /* vadd */
638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
639 vadd_s8 (int8x8_t __a, int8x8_t __b)
641 return __a + __b;
644 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
645 vadd_s16 (int16x4_t __a, int16x4_t __b)
647 return __a + __b;
650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
651 vadd_s32 (int32x2_t __a, int32x2_t __b)
653 return __a + __b;
656 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
657 vadd_f32 (float32x2_t __a, float32x2_t __b)
659 return __a + __b;
662 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
663 vadd_f64 (float64x1_t __a, float64x1_t __b)
665 return __a + __b;
668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
669 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
671 return __a + __b;
674 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
675 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
677 return __a + __b;
680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
681 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
683 return __a + __b;
686 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
687 vadd_s64 (int64x1_t __a, int64x1_t __b)
689 return __a + __b;
692 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
693 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
695 return __a + __b;
698 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
699 vaddq_s8 (int8x16_t __a, int8x16_t __b)
701 return __a + __b;
704 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
705 vaddq_s16 (int16x8_t __a, int16x8_t __b)
707 return __a + __b;
710 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
711 vaddq_s32 (int32x4_t __a, int32x4_t __b)
713 return __a + __b;
716 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
717 vaddq_s64 (int64x2_t __a, int64x2_t __b)
719 return __a + __b;
722 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
723 vaddq_f32 (float32x4_t __a, float32x4_t __b)
725 return __a + __b;
728 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
729 vaddq_f64 (float64x2_t __a, float64x2_t __b)
731 return __a + __b;
734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
735 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
737 return __a + __b;
740 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
741 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
743 return __a + __b;
746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
747 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
749 return __a + __b;
752 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
753 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
755 return __a + __b;
758 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
759 vaddl_s8 (int8x8_t __a, int8x8_t __b)
761 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
764 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
765 vaddl_s16 (int16x4_t __a, int16x4_t __b)
767 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
770 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
771 vaddl_s32 (int32x2_t __a, int32x2_t __b)
773 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
777 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
779 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
780 (int8x8_t) __b);
783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
784 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
786 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
787 (int16x4_t) __b);
790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
791 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
793 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
794 (int32x2_t) __b);
797 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
798 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
800 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
804 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
806 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
809 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
810 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
812 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
815 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
816 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
818 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
819 (int8x16_t) __b);
822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
823 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
825 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
826 (int16x8_t) __b);
829 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
830 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
832 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
833 (int32x4_t) __b);
836 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
837 vaddw_s8 (int16x8_t __a, int8x8_t __b)
839 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
843 vaddw_s16 (int32x4_t __a, int16x4_t __b)
845 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
848 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
849 vaddw_s32 (int64x2_t __a, int32x2_t __b)
851 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
855 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
857 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
858 (int8x8_t) __b);
861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
862 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
864 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
865 (int16x4_t) __b);
868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
869 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
871 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
872 (int32x2_t) __b);
875 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
876 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
878 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
881 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
882 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
884 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
887 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
888 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
890 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
894 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
896 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
897 (int8x16_t) __b);
900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
901 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
903 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
904 (int16x8_t) __b);
907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
908 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
910 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
911 (int32x4_t) __b);
914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
915 vhadd_s8 (int8x8_t __a, int8x8_t __b)
917 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
920 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
921 vhadd_s16 (int16x4_t __a, int16x4_t __b)
923 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
926 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
927 vhadd_s32 (int32x2_t __a, int32x2_t __b)
929 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
933 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
935 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
936 (int8x8_t) __b);
939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
940 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
942 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
943 (int16x4_t) __b);
946 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
947 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
949 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
950 (int32x2_t) __b);
953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
954 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
956 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
959 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
960 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
962 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
966 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
968 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
971 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
972 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
974 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
975 (int8x16_t) __b);
978 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
979 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
981 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
982 (int16x8_t) __b);
985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
986 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
988 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
989 (int32x4_t) __b);
992 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
993 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
995 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
998 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
999 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
1001 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
1004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1005 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
1007 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
1010 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1011 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
1013 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
1014 (int8x8_t) __b);
1017 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1018 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
1020 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1021 (int16x4_t) __b);
1024 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1025 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1027 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1028 (int32x2_t) __b);
1031 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1032 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1034 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1038 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1040 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1044 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1046 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1049 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1050 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1052 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1053 (int8x16_t) __b);
1056 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1057 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1059 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1060 (int16x8_t) __b);
1063 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1064 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1066 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1067 (int32x4_t) __b);
1070 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1071 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1073 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1076 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1077 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1079 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1082 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1083 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1085 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1088 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1089 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1091 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1092 (int16x8_t) __b);
1095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1096 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1098 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1099 (int32x4_t) __b);
1102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1103 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1105 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1106 (int64x2_t) __b);
1109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1110 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1112 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1115 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1116 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1118 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1121 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1122 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1124 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1127 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1128 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1130 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1131 (int16x8_t) __b);
1134 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1135 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1137 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1138 (int32x4_t) __b);
1141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1142 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1144 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1145 (int64x2_t) __b);
1148 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1149 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1151 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1155 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1157 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1160 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1161 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1163 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1166 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1167 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1169 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1170 (int16x8_t) __b,
1171 (int16x8_t) __c);
1174 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1175 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1177 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1178 (int32x4_t) __b,
1179 (int32x4_t) __c);
1182 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1183 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1185 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1186 (int64x2_t) __b,
1187 (int64x2_t) __c);
1190 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1191 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1193 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1196 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1197 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1199 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1203 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1205 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1209 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1211 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1212 (int16x8_t) __b,
1213 (int16x8_t) __c);
1216 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1217 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1219 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1220 (int32x4_t) __b,
1221 (int32x4_t) __c);
1224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1225 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1227 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1228 (int64x2_t) __b,
1229 (int64x2_t) __c);
1232 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1233 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1235 return __a / __b;
1238 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1239 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1241 return __a / __b;
1244 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1245 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1247 return __a / __b;
1250 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1251 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1253 return __a / __b;
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1257 vmul_s8 (int8x8_t __a, int8x8_t __b)
1259 return __a * __b;
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1263 vmul_s16 (int16x4_t __a, int16x4_t __b)
1265 return __a * __b;
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1269 vmul_s32 (int32x2_t __a, int32x2_t __b)
1271 return __a * __b;
1274 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1275 vmul_f32 (float32x2_t __a, float32x2_t __b)
1277 return __a * __b;
1280 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1281 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1283 return __a * __b;
1286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1287 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1289 return __a * __b;
1292 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1293 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1295 return __a * __b;
1298 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1299 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1301 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1302 (int8x8_t) __b);
1305 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1306 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1308 return __a * __b;
1311 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1312 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1314 return __a * __b;
1317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1318 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1320 return __a * __b;
1323 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1324 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1326 return __a * __b;
1329 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1330 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1332 return __a * __b;
1335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1336 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1338 return __a * __b;
1341 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1342 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1344 return __a * __b;
1347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1348 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1350 return __a * __b;
1353 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1354 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1356 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1357 (int8x16_t) __b);
1360 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1361 vand_s8 (int8x8_t __a, int8x8_t __b)
1363 return __a & __b;
1366 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1367 vand_s16 (int16x4_t __a, int16x4_t __b)
1369 return __a & __b;
1372 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1373 vand_s32 (int32x2_t __a, int32x2_t __b)
1375 return __a & __b;
1378 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1379 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1381 return __a & __b;
1384 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1385 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1387 return __a & __b;
1390 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1391 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1393 return __a & __b;
1396 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1397 vand_s64 (int64x1_t __a, int64x1_t __b)
1399 return __a & __b;
1402 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1403 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1405 return __a & __b;
1408 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1409 vandq_s8 (int8x16_t __a, int8x16_t __b)
1411 return __a & __b;
1414 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1415 vandq_s16 (int16x8_t __a, int16x8_t __b)
1417 return __a & __b;
1420 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1421 vandq_s32 (int32x4_t __a, int32x4_t __b)
1423 return __a & __b;
1426 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1427 vandq_s64 (int64x2_t __a, int64x2_t __b)
1429 return __a & __b;
1432 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1433 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1435 return __a & __b;
1438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1439 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1441 return __a & __b;
1444 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1445 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1447 return __a & __b;
1450 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1451 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1453 return __a & __b;
1456 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1457 vorr_s8 (int8x8_t __a, int8x8_t __b)
1459 return __a | __b;
1462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1463 vorr_s16 (int16x4_t __a, int16x4_t __b)
1465 return __a | __b;
1468 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1469 vorr_s32 (int32x2_t __a, int32x2_t __b)
1471 return __a | __b;
1474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1475 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1477 return __a | __b;
1480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1481 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1483 return __a | __b;
1486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1487 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1489 return __a | __b;
1492 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1493 vorr_s64 (int64x1_t __a, int64x1_t __b)
1495 return __a | __b;
1498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1499 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1501 return __a | __b;
1504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1505 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1507 return __a | __b;
1510 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1511 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1513 return __a | __b;
1516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1517 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1519 return __a | __b;
1522 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1523 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1525 return __a | __b;
1528 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1529 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1531 return __a | __b;
1534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1535 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1537 return __a | __b;
1540 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1541 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1543 return __a | __b;
1546 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1547 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1549 return __a | __b;
1552 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1553 veor_s8 (int8x8_t __a, int8x8_t __b)
1555 return __a ^ __b;
1558 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1559 veor_s16 (int16x4_t __a, int16x4_t __b)
1561 return __a ^ __b;
1564 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1565 veor_s32 (int32x2_t __a, int32x2_t __b)
1567 return __a ^ __b;
1570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1571 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1573 return __a ^ __b;
1576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1577 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1579 return __a ^ __b;
1582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1583 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1585 return __a ^ __b;
1588 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1589 veor_s64 (int64x1_t __a, int64x1_t __b)
1591 return __a ^ __b;
1594 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1595 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1597 return __a ^ __b;
1600 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1601 veorq_s8 (int8x16_t __a, int8x16_t __b)
1603 return __a ^ __b;
1606 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1607 veorq_s16 (int16x8_t __a, int16x8_t __b)
1609 return __a ^ __b;
1612 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1613 veorq_s32 (int32x4_t __a, int32x4_t __b)
1615 return __a ^ __b;
1618 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1619 veorq_s64 (int64x2_t __a, int64x2_t __b)
1621 return __a ^ __b;
1624 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1625 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1627 return __a ^ __b;
1630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1631 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1633 return __a ^ __b;
1636 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1637 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1639 return __a ^ __b;
1642 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1643 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1645 return __a ^ __b;
1648 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1649 vbic_s8 (int8x8_t __a, int8x8_t __b)
1651 return __a & ~__b;
1654 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1655 vbic_s16 (int16x4_t __a, int16x4_t __b)
1657 return __a & ~__b;
1660 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1661 vbic_s32 (int32x2_t __a, int32x2_t __b)
1663 return __a & ~__b;
1666 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1667 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1669 return __a & ~__b;
1672 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1673 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1675 return __a & ~__b;
1678 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1679 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1681 return __a & ~__b;
1684 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1685 vbic_s64 (int64x1_t __a, int64x1_t __b)
1687 return __a & ~__b;
1690 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1691 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1693 return __a & ~__b;
1696 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1697 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1699 return __a & ~__b;
1702 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1703 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1705 return __a & ~__b;
1708 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1709 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1711 return __a & ~__b;
1714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1715 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1717 return __a & ~__b;
1720 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1721 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1723 return __a & ~__b;
1726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1727 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1729 return __a & ~__b;
1732 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1733 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1735 return __a & ~__b;
1738 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1739 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1741 return __a & ~__b;
1744 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1745 vorn_s8 (int8x8_t __a, int8x8_t __b)
1747 return __a | ~__b;
1750 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1751 vorn_s16 (int16x4_t __a, int16x4_t __b)
1753 return __a | ~__b;
1756 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1757 vorn_s32 (int32x2_t __a, int32x2_t __b)
1759 return __a | ~__b;
1762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1763 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1765 return __a | ~__b;
1768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1769 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1771 return __a | ~__b;
1774 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1775 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1777 return __a | ~__b;
1780 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1781 vorn_s64 (int64x1_t __a, int64x1_t __b)
1783 return __a | ~__b;
1786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1787 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1789 return __a | ~__b;
1792 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1793 vornq_s8 (int8x16_t __a, int8x16_t __b)
1795 return __a | ~__b;
1798 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1799 vornq_s16 (int16x8_t __a, int16x8_t __b)
1801 return __a | ~__b;
1804 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1805 vornq_s32 (int32x4_t __a, int32x4_t __b)
1807 return __a | ~__b;
1810 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1811 vornq_s64 (int64x2_t __a, int64x2_t __b)
1813 return __a | ~__b;
1816 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1817 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1819 return __a | ~__b;
1822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1823 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1825 return __a | ~__b;
1828 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1829 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1831 return __a | ~__b;
1834 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1835 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1837 return __a | ~__b;
1840 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1841 vsub_s8 (int8x8_t __a, int8x8_t __b)
1843 return __a - __b;
1846 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1847 vsub_s16 (int16x4_t __a, int16x4_t __b)
1849 return __a - __b;
1852 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1853 vsub_s32 (int32x2_t __a, int32x2_t __b)
1855 return __a - __b;
1858 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1859 vsub_f32 (float32x2_t __a, float32x2_t __b)
1861 return __a - __b;
1864 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1865 vsub_f64 (float64x1_t __a, float64x1_t __b)
1867 return __a - __b;
1870 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1871 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1873 return __a - __b;
1876 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1877 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1879 return __a - __b;
1882 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1883 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1885 return __a - __b;
1888 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1889 vsub_s64 (int64x1_t __a, int64x1_t __b)
1891 return __a - __b;
1894 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1895 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1897 return __a - __b;
1900 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1901 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1903 return __a - __b;
1906 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1907 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1909 return __a - __b;
1912 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1913 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1915 return __a - __b;
1918 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1919 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1921 return __a - __b;
1924 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1925 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1927 return __a - __b;
1930 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1931 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1933 return __a - __b;
1936 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1937 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1939 return __a - __b;
1942 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1943 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1945 return __a - __b;
1948 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1949 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1951 return __a - __b;
1954 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1955 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1957 return __a - __b;
1960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1961 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1963 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1967 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1969 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1972 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1973 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1975 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1978 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1979 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1981 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1982 (int8x8_t) __b);
1985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1986 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1988 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1989 (int16x4_t) __b);
1992 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1993 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1995 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1996 (int32x2_t) __b);
1999 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2000 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
2002 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
2005 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2006 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
2008 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
2011 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2012 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
2014 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
2017 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2018 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
2020 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2021 (int8x16_t) __b);
2024 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2025 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2027 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2028 (int16x8_t) __b);
2031 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2032 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2034 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2035 (int32x4_t) __b);
2038 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2039 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2041 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2044 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2045 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2047 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2050 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2051 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2053 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2056 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2057 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2059 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2060 (int8x8_t) __b);
2063 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2064 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2066 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2067 (int16x4_t) __b);
2070 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2071 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2073 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2074 (int32x2_t) __b);
2077 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2078 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2080 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2083 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2084 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2086 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2089 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2090 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2092 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2095 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2096 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2098 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2099 (int8x16_t) __b);
2102 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2103 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2105 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2106 (int16x8_t) __b);
2109 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2110 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2112 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2113 (int32x4_t) __b);
2116 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2117 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2119 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2122 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2123 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2125 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2128 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2129 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2131 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2134 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2135 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2137 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2140 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2141 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2143 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2146 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2147 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2149 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2152 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2153 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2155 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2158 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2159 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2161 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2164 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2165 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2167 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2170 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2171 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2173 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2176 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2177 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2179 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2182 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2183 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2185 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2188 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2189 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2191 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2194 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2195 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2197 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2200 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2201 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2203 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2206 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2207 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2209 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2212 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2213 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2215 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2218 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2219 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2221 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2224 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2225 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2227 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2231 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2233 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2236 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2237 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2239 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2242 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2243 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2245 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2248 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2249 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2251 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2254 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2255 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2257 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2260 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2261 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2263 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2266 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2267 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2269 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2273 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2275 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2278 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2279 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2281 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2284 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2285 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2287 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2290 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2291 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2293 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2296 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2297 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2299 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2302 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2303 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2305 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2308 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2309 vqneg_s8 (int8x8_t __a)
2311 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2314 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2315 vqneg_s16 (int16x4_t __a)
2317 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2320 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2321 vqneg_s32 (int32x2_t __a)
2323 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2326 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2327 vqneg_s64 (int64x1_t __a)
2329 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2333 vqnegq_s8 (int8x16_t __a)
2335 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2339 vqnegq_s16 (int16x8_t __a)
2341 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2344 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2345 vqnegq_s32 (int32x4_t __a)
2347 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2350 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2351 vqabs_s8 (int8x8_t __a)
2353 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2356 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2357 vqabs_s16 (int16x4_t __a)
2359 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2362 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2363 vqabs_s32 (int32x2_t __a)
2365 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2368 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2369 vqabs_s64 (int64x1_t __a)
2371 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2374 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2375 vqabsq_s8 (int8x16_t __a)
2377 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2381 vqabsq_s16 (int16x8_t __a)
2383 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2386 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2387 vqabsq_s32 (int32x4_t __a)
2389 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2392 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2393 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2395 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2398 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2399 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2401 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2404 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2405 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2407 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2410 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2411 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2413 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2416 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2417 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2419 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2422 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2423 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2425 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2429 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2431 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2435 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2437 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2440 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2441 vcreate_s8 (uint64_t __a)
2443 return (int8x8_t) __a;
2446 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2447 vcreate_s16 (uint64_t __a)
2449 return (int16x4_t) __a;
2452 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2453 vcreate_s32 (uint64_t __a)
2455 return (int32x2_t) __a;
2458 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2459 vcreate_s64 (uint64_t __a)
2461 return (int64x1_t) {__a};
2464 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2465 vcreate_f32 (uint64_t __a)
2467 return (float32x2_t) __a;
2470 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2471 vcreate_u8 (uint64_t __a)
2473 return (uint8x8_t) __a;
2476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2477 vcreate_u16 (uint64_t __a)
2479 return (uint16x4_t) __a;
2482 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2483 vcreate_u32 (uint64_t __a)
2485 return (uint32x2_t) __a;
2488 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2489 vcreate_u64 (uint64_t __a)
2491 return (uint64x1_t) {__a};
2494 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2495 vcreate_f64 (uint64_t __a)
2497 return __builtin_aarch64_createv1df (__a);
2500 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2501 vcreate_p8 (uint64_t __a)
2503 return (poly8x8_t) __a;
2506 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2507 vcreate_p16 (uint64_t __a)
2509 return (poly16x4_t) __a;
2512 /* vget_lane */
2514 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2515 vget_lane_f32 (float32x2_t __a, const int __b)
2517 return __aarch64_vget_lane_f32 (__a, __b);
2520 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2521 vget_lane_f64 (float64x1_t __a, const int __b)
2523 return __aarch64_vget_lane_f64 (__a, __b);
2526 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2527 vget_lane_p8 (poly8x8_t __a, const int __b)
2529 return __aarch64_vget_lane_p8 (__a, __b);
2532 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2533 vget_lane_p16 (poly16x4_t __a, const int __b)
2535 return __aarch64_vget_lane_p16 (__a, __b);
2538 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2539 vget_lane_s8 (int8x8_t __a, const int __b)
2541 return __aarch64_vget_lane_s8 (__a, __b);
2544 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2545 vget_lane_s16 (int16x4_t __a, const int __b)
2547 return __aarch64_vget_lane_s16 (__a, __b);
2550 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2551 vget_lane_s32 (int32x2_t __a, const int __b)
2553 return __aarch64_vget_lane_s32 (__a, __b);
2556 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2557 vget_lane_s64 (int64x1_t __a, const int __b)
2559 return __aarch64_vget_lane_s64 (__a, __b);
2562 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2563 vget_lane_u8 (uint8x8_t __a, const int __b)
2565 return __aarch64_vget_lane_u8 (__a, __b);
2568 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2569 vget_lane_u16 (uint16x4_t __a, const int __b)
2571 return __aarch64_vget_lane_u16 (__a, __b);
2574 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2575 vget_lane_u32 (uint32x2_t __a, const int __b)
2577 return __aarch64_vget_lane_u32 (__a, __b);
2580 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2581 vget_lane_u64 (uint64x1_t __a, const int __b)
2583 return __aarch64_vget_lane_u64 (__a, __b);
2586 /* vgetq_lane */
2588 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2589 vgetq_lane_f32 (float32x4_t __a, const int __b)
2591 return __aarch64_vgetq_lane_f32 (__a, __b);
2594 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2595 vgetq_lane_f64 (float64x2_t __a, const int __b)
2597 return __aarch64_vgetq_lane_f64 (__a, __b);
2600 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2601 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2603 return __aarch64_vgetq_lane_p8 (__a, __b);
2606 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2607 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2609 return __aarch64_vgetq_lane_p16 (__a, __b);
2612 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2613 vgetq_lane_s8 (int8x16_t __a, const int __b)
2615 return __aarch64_vgetq_lane_s8 (__a, __b);
2618 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2619 vgetq_lane_s16 (int16x8_t __a, const int __b)
2621 return __aarch64_vgetq_lane_s16 (__a, __b);
2624 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2625 vgetq_lane_s32 (int32x4_t __a, const int __b)
2627 return __aarch64_vgetq_lane_s32 (__a, __b);
2630 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2631 vgetq_lane_s64 (int64x2_t __a, const int __b)
2633 return __aarch64_vgetq_lane_s64 (__a, __b);
2636 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2637 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2639 return __aarch64_vgetq_lane_u8 (__a, __b);
2642 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2643 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2645 return __aarch64_vgetq_lane_u16 (__a, __b);
2648 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2649 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2651 return __aarch64_vgetq_lane_u32 (__a, __b);
2654 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2655 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2657 return __aarch64_vgetq_lane_u64 (__a, __b);
2660 /* vreinterpret */
2662 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2663 vreinterpret_p8_f64 (float64x1_t __a)
2665 return __builtin_aarch64_reinterpretv8qiv1df_ps (__a);
2668 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2669 vreinterpret_p8_s8 (int8x8_t __a)
2671 return (poly8x8_t) __a;
2674 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2675 vreinterpret_p8_s16 (int16x4_t __a)
2677 return (poly8x8_t) __a;
2680 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2681 vreinterpret_p8_s32 (int32x2_t __a)
2683 return (poly8x8_t) __a;
2686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2687 vreinterpret_p8_s64 (int64x1_t __a)
2689 return (poly8x8_t) __a;
2692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2693 vreinterpret_p8_f32 (float32x2_t __a)
2695 return (poly8x8_t) __a;
2698 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2699 vreinterpret_p8_u8 (uint8x8_t __a)
2701 return (poly8x8_t) __a;
2704 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2705 vreinterpret_p8_u16 (uint16x4_t __a)
2707 return (poly8x8_t) __a;
2710 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2711 vreinterpret_p8_u32 (uint32x2_t __a)
2713 return (poly8x8_t) __a;
2716 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2717 vreinterpret_p8_u64 (uint64x1_t __a)
2719 return (poly8x8_t) __a;
2722 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2723 vreinterpret_p8_p16 (poly16x4_t __a)
2725 return (poly8x8_t) __a;
2728 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2729 vreinterpretq_p8_f64 (float64x2_t __a)
2731 return (poly8x16_t) __a;
2734 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2735 vreinterpretq_p8_s8 (int8x16_t __a)
2737 return (poly8x16_t) __a;
2740 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2741 vreinterpretq_p8_s16 (int16x8_t __a)
2743 return (poly8x16_t) __a;
2746 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2747 vreinterpretq_p8_s32 (int32x4_t __a)
2749 return (poly8x16_t) __a;
2752 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2753 vreinterpretq_p8_s64 (int64x2_t __a)
2755 return (poly8x16_t) __a;
2758 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2759 vreinterpretq_p8_f32 (float32x4_t __a)
2761 return (poly8x16_t) __a;
2764 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2765 vreinterpretq_p8_u8 (uint8x16_t __a)
2767 return (poly8x16_t) __a;
2770 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2771 vreinterpretq_p8_u16 (uint16x8_t __a)
2773 return (poly8x16_t) __a;
2776 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2777 vreinterpretq_p8_u32 (uint32x4_t __a)
2779 return (poly8x16_t) __a;
2782 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2783 vreinterpretq_p8_u64 (uint64x2_t __a)
2785 return (poly8x16_t) __a;
2788 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2789 vreinterpretq_p8_p16 (poly16x8_t __a)
2791 return (poly8x16_t) __a;
2794 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2795 vreinterpret_p16_f64 (float64x1_t __a)
2797 return __builtin_aarch64_reinterpretv4hiv1df_ps (__a);
2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801 vreinterpret_p16_s8 (int8x8_t __a)
2803 return (poly16x4_t) __a;
2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807 vreinterpret_p16_s16 (int16x4_t __a)
2809 return (poly16x4_t) __a;
2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813 vreinterpret_p16_s32 (int32x2_t __a)
2815 return (poly16x4_t) __a;
2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819 vreinterpret_p16_s64 (int64x1_t __a)
2821 return (poly16x4_t) __a;
2824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825 vreinterpret_p16_f32 (float32x2_t __a)
2827 return (poly16x4_t) __a;
2830 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2831 vreinterpret_p16_u8 (uint8x8_t __a)
2833 return (poly16x4_t) __a;
2836 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2837 vreinterpret_p16_u16 (uint16x4_t __a)
2839 return (poly16x4_t) __a;
2842 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2843 vreinterpret_p16_u32 (uint32x2_t __a)
2845 return (poly16x4_t) __a;
2848 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2849 vreinterpret_p16_u64 (uint64x1_t __a)
2851 return (poly16x4_t) __a;
2854 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2855 vreinterpret_p16_p8 (poly8x8_t __a)
2857 return (poly16x4_t) __a;
2860 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2861 vreinterpretq_p16_f64 (float64x2_t __a)
2863 return (poly16x8_t) __a;
2866 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2867 vreinterpretq_p16_s8 (int8x16_t __a)
2869 return (poly16x8_t) __a;
2872 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2873 vreinterpretq_p16_s16 (int16x8_t __a)
2875 return (poly16x8_t) __a;
2878 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2879 vreinterpretq_p16_s32 (int32x4_t __a)
2881 return (poly16x8_t) __a;
2884 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2885 vreinterpretq_p16_s64 (int64x2_t __a)
2887 return (poly16x8_t) __a;
2890 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2891 vreinterpretq_p16_f32 (float32x4_t __a)
2893 return (poly16x8_t) __a;
2896 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2897 vreinterpretq_p16_u8 (uint8x16_t __a)
2899 return (poly16x8_t) __a;
2902 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2903 vreinterpretq_p16_u16 (uint16x8_t __a)
2905 return (poly16x8_t) __a;
2908 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2909 vreinterpretq_p16_u32 (uint32x4_t __a)
2911 return (poly16x8_t) __a;
2914 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2915 vreinterpretq_p16_u64 (uint64x2_t __a)
2917 return (poly16x8_t) __a;
2920 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2921 vreinterpretq_p16_p8 (poly8x16_t __a)
2923 return (poly16x8_t) __a;
2926 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2927 vreinterpret_f32_f64 (float64x1_t __a)
2929 return __builtin_aarch64_reinterpretv2sfv1df (__a);
2932 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2933 vreinterpret_f32_s8 (int8x8_t __a)
2935 return (float32x2_t) __a;
2938 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2939 vreinterpret_f32_s16 (int16x4_t __a)
2941 return (float32x2_t) __a;
2944 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2945 vreinterpret_f32_s32 (int32x2_t __a)
2947 return (float32x2_t) __a;
2950 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2951 vreinterpret_f32_s64 (int64x1_t __a)
2953 return (float32x2_t) __a;
2956 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2957 vreinterpret_f32_u8 (uint8x8_t __a)
2959 return (float32x2_t) __a;
2962 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2963 vreinterpret_f32_u16 (uint16x4_t __a)
2965 return (float32x2_t) __a;
2968 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2969 vreinterpret_f32_u32 (uint32x2_t __a)
2971 return (float32x2_t) __a;
2974 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2975 vreinterpret_f32_u64 (uint64x1_t __a)
2977 return (float32x2_t) __a;
2980 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2981 vreinterpret_f32_p8 (poly8x8_t __a)
2983 return (float32x2_t) __a;
2986 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2987 vreinterpret_f32_p16 (poly16x4_t __a)
2989 return (float32x2_t) __a;
2992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2993 vreinterpretq_f32_f64 (float64x2_t __a)
2995 return (float32x4_t) __a;
2998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2999 vreinterpretq_f32_s8 (int8x16_t __a)
3001 return (float32x4_t) __a;
3004 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3005 vreinterpretq_f32_s16 (int16x8_t __a)
3007 return (float32x4_t) __a;
3010 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3011 vreinterpretq_f32_s32 (int32x4_t __a)
3013 return (float32x4_t) __a;
3016 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3017 vreinterpretq_f32_s64 (int64x2_t __a)
3019 return (float32x4_t) __a;
3022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3023 vreinterpretq_f32_u8 (uint8x16_t __a)
3025 return (float32x4_t) __a;
3028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3029 vreinterpretq_f32_u16 (uint16x8_t __a)
3031 return (float32x4_t) __a;
3034 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3035 vreinterpretq_f32_u32 (uint32x4_t __a)
3037 return (float32x4_t) __a;
3040 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3041 vreinterpretq_f32_u64 (uint64x2_t __a)
3043 return (float32x4_t) __a;
3046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3047 vreinterpretq_f32_p8 (poly8x16_t __a)
3049 return (float32x4_t) __a;
3052 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3053 vreinterpretq_f32_p16 (poly16x8_t __a)
3055 return (float32x4_t) __a;
3058 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3059 vreinterpret_f64_f32 (float32x2_t __a)
3061 return __builtin_aarch64_reinterpretv1dfv2sf (__a);
3064 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3065 vreinterpret_f64_p8 (poly8x8_t __a)
3067 return __builtin_aarch64_reinterpretv1dfv8qi_sp (__a);
3070 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3071 vreinterpret_f64_p16 (poly16x4_t __a)
3073 return __builtin_aarch64_reinterpretv1dfv4hi_sp (__a);
3076 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3077 vreinterpret_f64_s8 (int8x8_t __a)
3079 return __builtin_aarch64_reinterpretv1dfv8qi (__a);
3082 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3083 vreinterpret_f64_s16 (int16x4_t __a)
3085 return __builtin_aarch64_reinterpretv1dfv4hi (__a);
3088 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3089 vreinterpret_f64_s32 (int32x2_t __a)
3091 return __builtin_aarch64_reinterpretv1dfv2si (__a);
3094 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3095 vreinterpret_f64_s64 (int64x1_t __a)
3097 return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
3100 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3101 vreinterpret_f64_u8 (uint8x8_t __a)
3103 return __builtin_aarch64_reinterpretv1dfv8qi_su (__a);
3106 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3107 vreinterpret_f64_u16 (uint16x4_t __a)
3109 return __builtin_aarch64_reinterpretv1dfv4hi_su (__a);
3112 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3113 vreinterpret_f64_u32 (uint32x2_t __a)
3115 return __builtin_aarch64_reinterpretv1dfv2si_su (__a);
3118 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3119 vreinterpret_f64_u64 (uint64x1_t __a)
3121 return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
3124 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3125 vreinterpretq_f64_f32 (float32x4_t __a)
3127 return (float64x2_t) __a;
3130 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3131 vreinterpretq_f64_p8 (poly8x16_t __a)
3133 return (float64x2_t) __a;
3136 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3137 vreinterpretq_f64_p16 (poly16x8_t __a)
3139 return (float64x2_t) __a;
3142 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3143 vreinterpretq_f64_s8 (int8x16_t __a)
3145 return (float64x2_t) __a;
3148 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3149 vreinterpretq_f64_s16 (int16x8_t __a)
3151 return (float64x2_t) __a;
3154 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3155 vreinterpretq_f64_s32 (int32x4_t __a)
3157 return (float64x2_t) __a;
3160 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3161 vreinterpretq_f64_s64 (int64x2_t __a)
3163 return (float64x2_t) __a;
3166 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3167 vreinterpretq_f64_u8 (uint8x16_t __a)
3169 return (float64x2_t) __a;
3172 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3173 vreinterpretq_f64_u16 (uint16x8_t __a)
3175 return (float64x2_t) __a;
3178 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3179 vreinterpretq_f64_u32 (uint32x4_t __a)
3181 return (float64x2_t) __a;
3184 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3185 vreinterpretq_f64_u64 (uint64x2_t __a)
3187 return (float64x2_t) __a;
3190 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3191 vreinterpret_s64_f64 (float64x1_t __a)
3193 return (int64x1_t) {__builtin_aarch64_reinterpretdiv1df (__a)};
3196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3197 vreinterpret_s64_s8 (int8x8_t __a)
3199 return (int64x1_t) __a;
3202 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3203 vreinterpret_s64_s16 (int16x4_t __a)
3205 return (int64x1_t) __a;
3208 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3209 vreinterpret_s64_s32 (int32x2_t __a)
3211 return (int64x1_t) __a;
3214 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3215 vreinterpret_s64_f32 (float32x2_t __a)
3217 return (int64x1_t) __a;
3220 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3221 vreinterpret_s64_u8 (uint8x8_t __a)
3223 return (int64x1_t) __a;
3226 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3227 vreinterpret_s64_u16 (uint16x4_t __a)
3229 return (int64x1_t) __a;
3232 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3233 vreinterpret_s64_u32 (uint32x2_t __a)
3235 return (int64x1_t) __a;
3238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3239 vreinterpret_s64_u64 (uint64x1_t __a)
3241 return (int64x1_t) __a;
3244 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3245 vreinterpret_s64_p8 (poly8x8_t __a)
3247 return (int64x1_t) __a;
3250 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3251 vreinterpret_s64_p16 (poly16x4_t __a)
3253 return (int64x1_t) __a;
3256 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3257 vreinterpretq_s64_f64 (float64x2_t __a)
3259 return (int64x2_t) __a;
3262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3263 vreinterpretq_s64_s8 (int8x16_t __a)
3265 return (int64x2_t) __a;
3268 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3269 vreinterpretq_s64_s16 (int16x8_t __a)
3271 return (int64x2_t) __a;
3274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3275 vreinterpretq_s64_s32 (int32x4_t __a)
3277 return (int64x2_t) __a;
3280 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3281 vreinterpretq_s64_f32 (float32x4_t __a)
3283 return (int64x2_t) __a;
3286 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3287 vreinterpretq_s64_u8 (uint8x16_t __a)
3289 return (int64x2_t) __a;
3292 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3293 vreinterpretq_s64_u16 (uint16x8_t __a)
3295 return (int64x2_t) __a;
3298 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3299 vreinterpretq_s64_u32 (uint32x4_t __a)
3301 return (int64x2_t) __a;
3304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3305 vreinterpretq_s64_u64 (uint64x2_t __a)
3307 return (int64x2_t) __a;
3310 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3311 vreinterpretq_s64_p8 (poly8x16_t __a)
3313 return (int64x2_t) __a;
3316 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3317 vreinterpretq_s64_p16 (poly16x8_t __a)
3319 return (int64x2_t) __a;
3322 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3323 vreinterpret_u64_f64 (float64x1_t __a)
3325 return (uint64x1_t) {__builtin_aarch64_reinterpretdiv1df_us (__a)};
3328 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3329 vreinterpret_u64_s8 (int8x8_t __a)
3331 return (uint64x1_t) __a;
3334 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3335 vreinterpret_u64_s16 (int16x4_t __a)
3337 return (uint64x1_t) __a;
3340 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3341 vreinterpret_u64_s32 (int32x2_t __a)
3343 return (uint64x1_t) __a;
3346 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3347 vreinterpret_u64_s64 (int64x1_t __a)
3349 return (uint64x1_t) __a;
3352 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3353 vreinterpret_u64_f32 (float32x2_t __a)
3355 return (uint64x1_t) __a;
3358 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3359 vreinterpret_u64_u8 (uint8x8_t __a)
3361 return (uint64x1_t) __a;
3364 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3365 vreinterpret_u64_u16 (uint16x4_t __a)
3367 return (uint64x1_t) __a;
3370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3371 vreinterpret_u64_u32 (uint32x2_t __a)
3373 return (uint64x1_t) __a;
3376 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3377 vreinterpret_u64_p8 (poly8x8_t __a)
3379 return (uint64x1_t) __a;
3382 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3383 vreinterpret_u64_p16 (poly16x4_t __a)
3385 return (uint64x1_t) __a;
3388 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3389 vreinterpretq_u64_f64 (float64x2_t __a)
3391 return (uint64x2_t) __a;
3394 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3395 vreinterpretq_u64_s8 (int8x16_t __a)
3397 return (uint64x2_t) __a;
3400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3401 vreinterpretq_u64_s16 (int16x8_t __a)
3403 return (uint64x2_t) __a;
3406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3407 vreinterpretq_u64_s32 (int32x4_t __a)
3409 return (uint64x2_t) __a;
3412 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3413 vreinterpretq_u64_s64 (int64x2_t __a)
3415 return (uint64x2_t) __a;
3418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3419 vreinterpretq_u64_f32 (float32x4_t __a)
3421 return (uint64x2_t) __a;
3424 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3425 vreinterpretq_u64_u8 (uint8x16_t __a)
3427 return (uint64x2_t) __a;
3430 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3431 vreinterpretq_u64_u16 (uint16x8_t __a)
3433 return (uint64x2_t) __a;
3436 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3437 vreinterpretq_u64_u32 (uint32x4_t __a)
3439 return (uint64x2_t) __a;
3442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3443 vreinterpretq_u64_p8 (poly8x16_t __a)
3445 return (uint64x2_t) __a;
3448 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3449 vreinterpretq_u64_p16 (poly16x8_t __a)
3451 return (uint64x2_t) __a;
3454 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3455 vreinterpret_s8_f64 (float64x1_t __a)
3457 return __builtin_aarch64_reinterpretv8qiv1df (__a);
3460 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3461 vreinterpret_s8_s16 (int16x4_t __a)
3463 return (int8x8_t) __a;
3466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3467 vreinterpret_s8_s32 (int32x2_t __a)
3469 return (int8x8_t) __a;
3472 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3473 vreinterpret_s8_s64 (int64x1_t __a)
3475 return (int8x8_t) __a;
3478 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3479 vreinterpret_s8_f32 (float32x2_t __a)
3481 return (int8x8_t) __a;
3484 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3485 vreinterpret_s8_u8 (uint8x8_t __a)
3487 return (int8x8_t) __a;
3490 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3491 vreinterpret_s8_u16 (uint16x4_t __a)
3493 return (int8x8_t) __a;
3496 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3497 vreinterpret_s8_u32 (uint32x2_t __a)
3499 return (int8x8_t) __a;
3502 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3503 vreinterpret_s8_u64 (uint64x1_t __a)
3505 return (int8x8_t) __a;
3508 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3509 vreinterpret_s8_p8 (poly8x8_t __a)
3511 return (int8x8_t) __a;
3514 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3515 vreinterpret_s8_p16 (poly16x4_t __a)
3517 return (int8x8_t) __a;
3520 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3521 vreinterpretq_s8_f64 (float64x2_t __a)
3523 return (int8x16_t) __a;
3526 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3527 vreinterpretq_s8_s16 (int16x8_t __a)
3529 return (int8x16_t) __a;
3532 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3533 vreinterpretq_s8_s32 (int32x4_t __a)
3535 return (int8x16_t) __a;
3538 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3539 vreinterpretq_s8_s64 (int64x2_t __a)
3541 return (int8x16_t) __a;
3544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3545 vreinterpretq_s8_f32 (float32x4_t __a)
3547 return (int8x16_t) __a;
3550 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3551 vreinterpretq_s8_u8 (uint8x16_t __a)
3553 return (int8x16_t) __a;
3556 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3557 vreinterpretq_s8_u16 (uint16x8_t __a)
3559 return (int8x16_t) __a;
3562 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3563 vreinterpretq_s8_u32 (uint32x4_t __a)
3565 return (int8x16_t) __a;
3568 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3569 vreinterpretq_s8_u64 (uint64x2_t __a)
3571 return (int8x16_t) __a;
3574 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3575 vreinterpretq_s8_p8 (poly8x16_t __a)
3577 return (int8x16_t) __a;
3580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3581 vreinterpretq_s8_p16 (poly16x8_t __a)
3583 return (int8x16_t) __a;
3586 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3587 vreinterpret_s16_f64 (float64x1_t __a)
3589 return __builtin_aarch64_reinterpretv4hiv1df (__a);
3592 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3593 vreinterpret_s16_s8 (int8x8_t __a)
3595 return (int16x4_t) __a;
3598 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3599 vreinterpret_s16_s32 (int32x2_t __a)
3601 return (int16x4_t) __a;
3604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3605 vreinterpret_s16_s64 (int64x1_t __a)
3607 return (int16x4_t) __a;
3610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3611 vreinterpret_s16_f32 (float32x2_t __a)
3613 return (int16x4_t) __a;
3616 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3617 vreinterpret_s16_u8 (uint8x8_t __a)
3619 return (int16x4_t) __a;
3622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3623 vreinterpret_s16_u16 (uint16x4_t __a)
3625 return (int16x4_t) __a;
3628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3629 vreinterpret_s16_u32 (uint32x2_t __a)
3631 return (int16x4_t) __a;
3634 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3635 vreinterpret_s16_u64 (uint64x1_t __a)
3637 return (int16x4_t) __a;
3640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3641 vreinterpret_s16_p8 (poly8x8_t __a)
3643 return (int16x4_t) __a;
3646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3647 vreinterpret_s16_p16 (poly16x4_t __a)
3649 return (int16x4_t) __a;
3652 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3653 vreinterpretq_s16_f64 (float64x2_t __a)
3655 return (int16x8_t) __a;
3658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3659 vreinterpretq_s16_s8 (int8x16_t __a)
3661 return (int16x8_t) __a;
3664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3665 vreinterpretq_s16_s32 (int32x4_t __a)
3667 return (int16x8_t) __a;
3670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3671 vreinterpretq_s16_s64 (int64x2_t __a)
3673 return (int16x8_t) __a;
3676 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3677 vreinterpretq_s16_f32 (float32x4_t __a)
3679 return (int16x8_t) __a;
3682 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3683 vreinterpretq_s16_u8 (uint8x16_t __a)
3685 return (int16x8_t) __a;
3688 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3689 vreinterpretq_s16_u16 (uint16x8_t __a)
3691 return (int16x8_t) __a;
3694 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3695 vreinterpretq_s16_u32 (uint32x4_t __a)
3697 return (int16x8_t) __a;
3700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3701 vreinterpretq_s16_u64 (uint64x2_t __a)
3703 return (int16x8_t) __a;
3706 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3707 vreinterpretq_s16_p8 (poly8x16_t __a)
3709 return (int16x8_t) __a;
3712 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3713 vreinterpretq_s16_p16 (poly16x8_t __a)
3715 return (int16x8_t) __a;
3718 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3719 vreinterpret_s32_f64 (float64x1_t __a)
3721 return __builtin_aarch64_reinterpretv2siv1df (__a);
3724 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3725 vreinterpret_s32_s8 (int8x8_t __a)
3727 return (int32x2_t) __a;
3730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3731 vreinterpret_s32_s16 (int16x4_t __a)
3733 return (int32x2_t) __a;
3736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3737 vreinterpret_s32_s64 (int64x1_t __a)
3739 return (int32x2_t) __a;
3742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3743 vreinterpret_s32_f32 (float32x2_t __a)
3745 return (int32x2_t) __a;
3748 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3749 vreinterpret_s32_u8 (uint8x8_t __a)
3751 return (int32x2_t) __a;
3754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3755 vreinterpret_s32_u16 (uint16x4_t __a)
3757 return (int32x2_t) __a;
3760 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3761 vreinterpret_s32_u32 (uint32x2_t __a)
3763 return (int32x2_t) __a;
3766 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3767 vreinterpret_s32_u64 (uint64x1_t __a)
3769 return (int32x2_t) __a;
3772 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3773 vreinterpret_s32_p8 (poly8x8_t __a)
3775 return (int32x2_t) __a;
3778 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3779 vreinterpret_s32_p16 (poly16x4_t __a)
3781 return (int32x2_t) __a;
3784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3785 vreinterpretq_s32_f64 (float64x2_t __a)
3787 return (int32x4_t) __a;
3790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3791 vreinterpretq_s32_s8 (int8x16_t __a)
3793 return (int32x4_t) __a;
3796 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3797 vreinterpretq_s32_s16 (int16x8_t __a)
3799 return (int32x4_t) __a;
3802 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3803 vreinterpretq_s32_s64 (int64x2_t __a)
3805 return (int32x4_t) __a;
3808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3809 vreinterpretq_s32_f32 (float32x4_t __a)
3811 return (int32x4_t) __a;
3814 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3815 vreinterpretq_s32_u8 (uint8x16_t __a)
3817 return (int32x4_t) __a;
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3821 vreinterpretq_s32_u16 (uint16x8_t __a)
3823 return (int32x4_t) __a;
3826 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3827 vreinterpretq_s32_u32 (uint32x4_t __a)
3829 return (int32x4_t) __a;
3832 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3833 vreinterpretq_s32_u64 (uint64x2_t __a)
3835 return (int32x4_t) __a;
3838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3839 vreinterpretq_s32_p8 (poly8x16_t __a)
3841 return (int32x4_t) __a;
3844 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3845 vreinterpretq_s32_p16 (poly16x8_t __a)
3847 return (int32x4_t) __a;
3850 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3851 vreinterpret_u8_f64 (float64x1_t __a)
3853 return __builtin_aarch64_reinterpretv8qiv1df_us (__a);
3856 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3857 vreinterpret_u8_s8 (int8x8_t __a)
3859 return (uint8x8_t) __a;
3862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3863 vreinterpret_u8_s16 (int16x4_t __a)
3865 return (uint8x8_t) __a;
3868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3869 vreinterpret_u8_s32 (int32x2_t __a)
3871 return (uint8x8_t) __a;
3874 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3875 vreinterpret_u8_s64 (int64x1_t __a)
3877 return (uint8x8_t) __a;
3880 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3881 vreinterpret_u8_f32 (float32x2_t __a)
3883 return (uint8x8_t) __a;
3886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3887 vreinterpret_u8_u16 (uint16x4_t __a)
3889 return (uint8x8_t) __a;
3892 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3893 vreinterpret_u8_u32 (uint32x2_t __a)
3895 return (uint8x8_t) __a;
3898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3899 vreinterpret_u8_u64 (uint64x1_t __a)
3901 return (uint8x8_t) __a;
3904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3905 vreinterpret_u8_p8 (poly8x8_t __a)
3907 return (uint8x8_t) __a;
3910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3911 vreinterpret_u8_p16 (poly16x4_t __a)
3913 return (uint8x8_t) __a;
3916 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3917 vreinterpretq_u8_f64 (float64x2_t __a)
3919 return (uint8x16_t) __a;
3922 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3923 vreinterpretq_u8_s8 (int8x16_t __a)
3925 return (uint8x16_t) __a;
3928 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3929 vreinterpretq_u8_s16 (int16x8_t __a)
3931 return (uint8x16_t) __a;
3934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3935 vreinterpretq_u8_s32 (int32x4_t __a)
3937 return (uint8x16_t) __a;
3940 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3941 vreinterpretq_u8_s64 (int64x2_t __a)
3943 return (uint8x16_t) __a;
3946 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3947 vreinterpretq_u8_f32 (float32x4_t __a)
3949 return (uint8x16_t) __a;
3952 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3953 vreinterpretq_u8_u16 (uint16x8_t __a)
3955 return (uint8x16_t) __a;
3958 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3959 vreinterpretq_u8_u32 (uint32x4_t __a)
3961 return (uint8x16_t) __a;
3964 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3965 vreinterpretq_u8_u64 (uint64x2_t __a)
3967 return (uint8x16_t) __a;
3970 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3971 vreinterpretq_u8_p8 (poly8x16_t __a)
3973 return (uint8x16_t) __a;
3976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3977 vreinterpretq_u8_p16 (poly16x8_t __a)
3979 return (uint8x16_t) __a;
3982 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3983 vreinterpret_u16_f64 (float64x1_t __a)
3985 return __builtin_aarch64_reinterpretv4hiv1df_us (__a);
3988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3989 vreinterpret_u16_s8 (int8x8_t __a)
3991 return (uint16x4_t) __a;
3994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3995 vreinterpret_u16_s16 (int16x4_t __a)
3997 return (uint16x4_t) __a;
4000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4001 vreinterpret_u16_s32 (int32x2_t __a)
4003 return (uint16x4_t) __a;
4006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4007 vreinterpret_u16_s64 (int64x1_t __a)
4009 return (uint16x4_t) __a;
4012 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4013 vreinterpret_u16_f32 (float32x2_t __a)
4015 return (uint16x4_t) __a;
4018 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4019 vreinterpret_u16_u8 (uint8x8_t __a)
4021 return (uint16x4_t) __a;
4024 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4025 vreinterpret_u16_u32 (uint32x2_t __a)
4027 return (uint16x4_t) __a;
4030 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4031 vreinterpret_u16_u64 (uint64x1_t __a)
4033 return (uint16x4_t) __a;
4036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4037 vreinterpret_u16_p8 (poly8x8_t __a)
4039 return (uint16x4_t) __a;
4042 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4043 vreinterpret_u16_p16 (poly16x4_t __a)
4045 return (uint16x4_t) __a;
4048 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4049 vreinterpretq_u16_f64 (float64x2_t __a)
4051 return (uint16x8_t) __a;
4054 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4055 vreinterpretq_u16_s8 (int8x16_t __a)
4057 return (uint16x8_t) __a;
4060 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4061 vreinterpretq_u16_s16 (int16x8_t __a)
4063 return (uint16x8_t) __a;
4066 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4067 vreinterpretq_u16_s32 (int32x4_t __a)
4069 return (uint16x8_t) __a;
4072 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4073 vreinterpretq_u16_s64 (int64x2_t __a)
4075 return (uint16x8_t) __a;
4078 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4079 vreinterpretq_u16_f32 (float32x4_t __a)
4081 return (uint16x8_t) __a;
4084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4085 vreinterpretq_u16_u8 (uint8x16_t __a)
4087 return (uint16x8_t) __a;
4090 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4091 vreinterpretq_u16_u32 (uint32x4_t __a)
4093 return (uint16x8_t) __a;
4096 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4097 vreinterpretq_u16_u64 (uint64x2_t __a)
4099 return (uint16x8_t) __a;
4102 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4103 vreinterpretq_u16_p8 (poly8x16_t __a)
4105 return (uint16x8_t) __a;
4108 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4109 vreinterpretq_u16_p16 (poly16x8_t __a)
4111 return (uint16x8_t) __a;
4114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4115 vreinterpret_u32_f64 (float64x1_t __a)
4117 return __builtin_aarch64_reinterpretv2siv1df_us (__a);
4120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4121 vreinterpret_u32_s8 (int8x8_t __a)
4123 return (uint32x2_t) __a;
4126 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4127 vreinterpret_u32_s16 (int16x4_t __a)
4129 return (uint32x2_t) __a;
4132 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4133 vreinterpret_u32_s32 (int32x2_t __a)
4135 return (uint32x2_t) __a;
4138 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4139 vreinterpret_u32_s64 (int64x1_t __a)
4141 return (uint32x2_t) __a;
4144 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4145 vreinterpret_u32_f32 (float32x2_t __a)
4147 return (uint32x2_t) __a;
4150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4151 vreinterpret_u32_u8 (uint8x8_t __a)
4153 return (uint32x2_t) __a;
4156 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4157 vreinterpret_u32_u16 (uint16x4_t __a)
4159 return (uint32x2_t) __a;
4162 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4163 vreinterpret_u32_u64 (uint64x1_t __a)
4165 return (uint32x2_t) __a;
4168 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4169 vreinterpret_u32_p8 (poly8x8_t __a)
4171 return (uint32x2_t) __a;
4174 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4175 vreinterpret_u32_p16 (poly16x4_t __a)
4177 return (uint32x2_t) __a;
4180 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4181 vreinterpretq_u32_f64 (float64x2_t __a)
4183 return (uint32x4_t) __a;
4186 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4187 vreinterpretq_u32_s8 (int8x16_t __a)
4189 return (uint32x4_t) __a;
4192 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4193 vreinterpretq_u32_s16 (int16x8_t __a)
4195 return (uint32x4_t) __a;
4198 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4199 vreinterpretq_u32_s32 (int32x4_t __a)
4201 return (uint32x4_t) __a;
4204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4205 vreinterpretq_u32_s64 (int64x2_t __a)
4207 return (uint32x4_t) __a;
4210 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4211 vreinterpretq_u32_f32 (float32x4_t __a)
4213 return (uint32x4_t) __a;
4216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4217 vreinterpretq_u32_u8 (uint8x16_t __a)
4219 return (uint32x4_t) __a;
4222 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4223 vreinterpretq_u32_u16 (uint16x8_t __a)
4225 return (uint32x4_t) __a;
4228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4229 vreinterpretq_u32_u64 (uint64x2_t __a)
4231 return (uint32x4_t) __a;
4234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4235 vreinterpretq_u32_p8 (poly8x16_t __a)
4237 return (uint32x4_t) __a;
4240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4241 vreinterpretq_u32_p16 (poly16x8_t __a)
4243 return (uint32x4_t) __a;
4246 #define __GET_LOW(__TYPE) \
4247 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4248 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4249 return vreinterpret_##__TYPE##_u64 (lo);
4251 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4252 vget_low_f32 (float32x4_t __a)
4254 __GET_LOW (f32);
4257 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4258 vget_low_f64 (float64x2_t __a)
4260 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4263 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4264 vget_low_p8 (poly8x16_t __a)
4266 __GET_LOW (p8);
4269 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4270 vget_low_p16 (poly16x8_t __a)
4272 __GET_LOW (p16);
4275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4276 vget_low_s8 (int8x16_t __a)
4278 __GET_LOW (s8);
4281 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4282 vget_low_s16 (int16x8_t __a)
4284 __GET_LOW (s16);
4287 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4288 vget_low_s32 (int32x4_t __a)
4290 __GET_LOW (s32);
4293 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4294 vget_low_s64 (int64x2_t __a)
4296 __GET_LOW (s64);
4299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4300 vget_low_u8 (uint8x16_t __a)
4302 __GET_LOW (u8);
4305 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4306 vget_low_u16 (uint16x8_t __a)
4308 __GET_LOW (u16);
4311 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4312 vget_low_u32 (uint32x4_t __a)
4314 __GET_LOW (u32);
4317 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4318 vget_low_u64 (uint64x2_t __a)
4320 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4323 #undef __GET_LOW
4325 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4326 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4328 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4331 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4332 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4334 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4337 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4338 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4340 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4343 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4344 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4346 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4349 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4350 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4352 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4355 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4356 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4358 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4359 (int8x8_t) __b);
4362 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4363 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4365 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4366 (int16x4_t) __b);
4369 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4370 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4372 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4373 (int32x2_t) __b);
4376 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4377 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4379 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4382 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4383 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4385 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4388 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4389 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4391 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4392 (int8x8_t) __b);
4395 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4396 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4398 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4399 (int16x4_t) __b);
4402 /* Start of temporary inline asm implementations. */
4404 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4405 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4407 int8x8_t result;
4408 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4409 : "=w"(result)
4410 : "0"(a), "w"(b), "w"(c)
4411 : /* No clobbers */);
4412 return result;
4415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4416 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4418 int16x4_t result;
4419 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4420 : "=w"(result)
4421 : "0"(a), "w"(b), "w"(c)
4422 : /* No clobbers */);
4423 return result;
4426 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4427 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4429 int32x2_t result;
4430 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4431 : "=w"(result)
4432 : "0"(a), "w"(b), "w"(c)
4433 : /* No clobbers */);
4434 return result;
4437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4438 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4440 uint8x8_t result;
4441 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4442 : "=w"(result)
4443 : "0"(a), "w"(b), "w"(c)
4444 : /* No clobbers */);
4445 return result;
4448 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4449 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4451 uint16x4_t result;
4452 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4453 : "=w"(result)
4454 : "0"(a), "w"(b), "w"(c)
4455 : /* No clobbers */);
4456 return result;
4459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4460 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4462 uint32x2_t result;
4463 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4464 : "=w"(result)
4465 : "0"(a), "w"(b), "w"(c)
4466 : /* No clobbers */);
4467 return result;
4470 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4471 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4473 int16x8_t result;
4474 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4475 : "=w"(result)
4476 : "0"(a), "w"(b), "w"(c)
4477 : /* No clobbers */);
4478 return result;
4481 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4482 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4484 int32x4_t result;
4485 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4486 : "=w"(result)
4487 : "0"(a), "w"(b), "w"(c)
4488 : /* No clobbers */);
4489 return result;
4492 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4493 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4495 int64x2_t result;
4496 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4497 : "=w"(result)
4498 : "0"(a), "w"(b), "w"(c)
4499 : /* No clobbers */);
4500 return result;
4503 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4504 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4506 uint16x8_t result;
4507 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4508 : "=w"(result)
4509 : "0"(a), "w"(b), "w"(c)
4510 : /* No clobbers */);
4511 return result;
4514 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4515 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4517 uint32x4_t result;
4518 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4519 : "=w"(result)
4520 : "0"(a), "w"(b), "w"(c)
4521 : /* No clobbers */);
4522 return result;
4525 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4526 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4528 uint64x2_t result;
4529 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4530 : "=w"(result)
4531 : "0"(a), "w"(b), "w"(c)
4532 : /* No clobbers */);
4533 return result;
4536 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4537 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4539 int16x8_t result;
4540 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4541 : "=w"(result)
4542 : "0"(a), "w"(b), "w"(c)
4543 : /* No clobbers */);
4544 return result;
4547 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4548 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4550 int32x4_t result;
4551 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4552 : "=w"(result)
4553 : "0"(a), "w"(b), "w"(c)
4554 : /* No clobbers */);
4555 return result;
4558 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4559 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4561 int64x2_t result;
4562 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4563 : "=w"(result)
4564 : "0"(a), "w"(b), "w"(c)
4565 : /* No clobbers */);
4566 return result;
4569 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4570 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4572 uint16x8_t result;
4573 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4574 : "=w"(result)
4575 : "0"(a), "w"(b), "w"(c)
4576 : /* No clobbers */);
4577 return result;
4580 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4581 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4583 uint32x4_t result;
4584 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4585 : "=w"(result)
4586 : "0"(a), "w"(b), "w"(c)
4587 : /* No clobbers */);
4588 return result;
4591 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4592 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4594 uint64x2_t result;
4595 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4596 : "=w"(result)
4597 : "0"(a), "w"(b), "w"(c)
4598 : /* No clobbers */);
4599 return result;
4602 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4603 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4605 int8x16_t result;
4606 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4607 : "=w"(result)
4608 : "0"(a), "w"(b), "w"(c)
4609 : /* No clobbers */);
4610 return result;
4613 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4614 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4616 int16x8_t result;
4617 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4618 : "=w"(result)
4619 : "0"(a), "w"(b), "w"(c)
4620 : /* No clobbers */);
4621 return result;
4624 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4625 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4627 int32x4_t result;
4628 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4629 : "=w"(result)
4630 : "0"(a), "w"(b), "w"(c)
4631 : /* No clobbers */);
4632 return result;
4635 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4636 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4638 uint8x16_t result;
4639 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4640 : "=w"(result)
4641 : "0"(a), "w"(b), "w"(c)
4642 : /* No clobbers */);
4643 return result;
4646 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4647 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4649 uint16x8_t result;
4650 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4651 : "=w"(result)
4652 : "0"(a), "w"(b), "w"(c)
4653 : /* No clobbers */);
4654 return result;
4657 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4658 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4660 uint32x4_t result;
4661 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4662 : "=w"(result)
4663 : "0"(a), "w"(b), "w"(c)
4664 : /* No clobbers */);
4665 return result;
4668 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4669 vabd_f32 (float32x2_t a, float32x2_t b)
4671 float32x2_t result;
4672 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4673 : "=w"(result)
4674 : "w"(a), "w"(b)
4675 : /* No clobbers */);
4676 return result;
4679 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4680 vabd_s8 (int8x8_t a, int8x8_t b)
4682 int8x8_t result;
4683 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4684 : "=w"(result)
4685 : "w"(a), "w"(b)
4686 : /* No clobbers */);
4687 return result;
4690 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4691 vabd_s16 (int16x4_t a, int16x4_t b)
4693 int16x4_t result;
4694 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4695 : "=w"(result)
4696 : "w"(a), "w"(b)
4697 : /* No clobbers */);
4698 return result;
4701 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4702 vabd_s32 (int32x2_t a, int32x2_t b)
4704 int32x2_t result;
4705 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4706 : "=w"(result)
4707 : "w"(a), "w"(b)
4708 : /* No clobbers */);
4709 return result;
4712 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4713 vabd_u8 (uint8x8_t a, uint8x8_t b)
4715 uint8x8_t result;
4716 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4717 : "=w"(result)
4718 : "w"(a), "w"(b)
4719 : /* No clobbers */);
4720 return result;
4723 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4724 vabd_u16 (uint16x4_t a, uint16x4_t b)
4726 uint16x4_t result;
4727 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4728 : "=w"(result)
4729 : "w"(a), "w"(b)
4730 : /* No clobbers */);
4731 return result;
4734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4735 vabd_u32 (uint32x2_t a, uint32x2_t b)
4737 uint32x2_t result;
4738 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4739 : "=w"(result)
4740 : "w"(a), "w"(b)
4741 : /* No clobbers */);
4742 return result;
4745 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4746 vabdd_f64 (float64_t a, float64_t b)
4748 float64_t result;
4749 __asm__ ("fabd %d0, %d1, %d2"
4750 : "=w"(result)
4751 : "w"(a), "w"(b)
4752 : /* No clobbers */);
4753 return result;
4756 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4757 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4759 int16x8_t result;
4760 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4761 : "=w"(result)
4762 : "w"(a), "w"(b)
4763 : /* No clobbers */);
4764 return result;
4767 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4768 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4770 int32x4_t result;
4771 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4772 : "=w"(result)
4773 : "w"(a), "w"(b)
4774 : /* No clobbers */);
4775 return result;
4778 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4779 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4781 int64x2_t result;
4782 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4783 : "=w"(result)
4784 : "w"(a), "w"(b)
4785 : /* No clobbers */);
4786 return result;
4789 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4790 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4792 uint16x8_t result;
4793 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4794 : "=w"(result)
4795 : "w"(a), "w"(b)
4796 : /* No clobbers */);
4797 return result;
4800 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4801 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4803 uint32x4_t result;
4804 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4805 : "=w"(result)
4806 : "w"(a), "w"(b)
4807 : /* No clobbers */);
4808 return result;
4811 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4812 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4814 uint64x2_t result;
4815 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4816 : "=w"(result)
4817 : "w"(a), "w"(b)
4818 : /* No clobbers */);
4819 return result;
4822 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4823 vabdl_s8 (int8x8_t a, int8x8_t b)
4825 int16x8_t result;
4826 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4827 : "=w"(result)
4828 : "w"(a), "w"(b)
4829 : /* No clobbers */);
4830 return result;
4833 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4834 vabdl_s16 (int16x4_t a, int16x4_t b)
4836 int32x4_t result;
4837 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4838 : "=w"(result)
4839 : "w"(a), "w"(b)
4840 : /* No clobbers */);
4841 return result;
4844 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4845 vabdl_s32 (int32x2_t a, int32x2_t b)
4847 int64x2_t result;
4848 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4849 : "=w"(result)
4850 : "w"(a), "w"(b)
4851 : /* No clobbers */);
4852 return result;
4855 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4856 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4858 uint16x8_t result;
4859 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4860 : "=w"(result)
4861 : "w"(a), "w"(b)
4862 : /* No clobbers */);
4863 return result;
4866 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4867 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4869 uint32x4_t result;
4870 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4871 : "=w"(result)
4872 : "w"(a), "w"(b)
4873 : /* No clobbers */);
4874 return result;
4877 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4878 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4880 uint64x2_t result;
4881 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4882 : "=w"(result)
4883 : "w"(a), "w"(b)
4884 : /* No clobbers */);
4885 return result;
4888 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4889 vabdq_f32 (float32x4_t a, float32x4_t b)
4891 float32x4_t result;
4892 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4893 : "=w"(result)
4894 : "w"(a), "w"(b)
4895 : /* No clobbers */);
4896 return result;
4899 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4900 vabdq_f64 (float64x2_t a, float64x2_t b)
4902 float64x2_t result;
4903 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4904 : "=w"(result)
4905 : "w"(a), "w"(b)
4906 : /* No clobbers */);
4907 return result;
4910 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4911 vabdq_s8 (int8x16_t a, int8x16_t b)
4913 int8x16_t result;
4914 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4915 : "=w"(result)
4916 : "w"(a), "w"(b)
4917 : /* No clobbers */);
4918 return result;
4921 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4922 vabdq_s16 (int16x8_t a, int16x8_t b)
4924 int16x8_t result;
4925 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4926 : "=w"(result)
4927 : "w"(a), "w"(b)
4928 : /* No clobbers */);
4929 return result;
4932 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4933 vabdq_s32 (int32x4_t a, int32x4_t b)
4935 int32x4_t result;
4936 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4937 : "=w"(result)
4938 : "w"(a), "w"(b)
4939 : /* No clobbers */);
4940 return result;
4943 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4944 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4946 uint8x16_t result;
4947 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4948 : "=w"(result)
4949 : "w"(a), "w"(b)
4950 : /* No clobbers */);
4951 return result;
4954 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4955 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4957 uint16x8_t result;
4958 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4959 : "=w"(result)
4960 : "w"(a), "w"(b)
4961 : /* No clobbers */);
4962 return result;
4965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4966 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4968 uint32x4_t result;
4969 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4970 : "=w"(result)
4971 : "w"(a), "w"(b)
4972 : /* No clobbers */);
4973 return result;
4976 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4977 vabds_f32 (float32_t a, float32_t b)
4979 float32_t result;
4980 __asm__ ("fabd %s0, %s1, %s2"
4981 : "=w"(result)
4982 : "w"(a), "w"(b)
4983 : /* No clobbers */);
4984 return result;
4987 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4988 vaddlv_s8 (int8x8_t a)
4990 int16_t result;
4991 __asm__ ("saddlv %h0,%1.8b"
4992 : "=w"(result)
4993 : "w"(a)
4994 : /* No clobbers */);
4995 return result;
4998 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4999 vaddlv_s16 (int16x4_t a)
5001 int32_t result;
5002 __asm__ ("saddlv %s0,%1.4h"
5003 : "=w"(result)
5004 : "w"(a)
5005 : /* No clobbers */);
5006 return result;
5009 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5010 vaddlv_u8 (uint8x8_t a)
5012 uint16_t result;
5013 __asm__ ("uaddlv %h0,%1.8b"
5014 : "=w"(result)
5015 : "w"(a)
5016 : /* No clobbers */);
5017 return result;
5020 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5021 vaddlv_u16 (uint16x4_t a)
5023 uint32_t result;
5024 __asm__ ("uaddlv %s0,%1.4h"
5025 : "=w"(result)
5026 : "w"(a)
5027 : /* No clobbers */);
5028 return result;
5031 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5032 vaddlvq_s8 (int8x16_t a)
5034 int16_t result;
5035 __asm__ ("saddlv %h0,%1.16b"
5036 : "=w"(result)
5037 : "w"(a)
5038 : /* No clobbers */);
5039 return result;
5042 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5043 vaddlvq_s16 (int16x8_t a)
5045 int32_t result;
5046 __asm__ ("saddlv %s0,%1.8h"
5047 : "=w"(result)
5048 : "w"(a)
5049 : /* No clobbers */);
5050 return result;
5053 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5054 vaddlvq_s32 (int32x4_t a)
5056 int64_t result;
5057 __asm__ ("saddlv %d0,%1.4s"
5058 : "=w"(result)
5059 : "w"(a)
5060 : /* No clobbers */);
5061 return result;
5064 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5065 vaddlvq_u8 (uint8x16_t a)
5067 uint16_t result;
5068 __asm__ ("uaddlv %h0,%1.16b"
5069 : "=w"(result)
5070 : "w"(a)
5071 : /* No clobbers */);
5072 return result;
5075 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5076 vaddlvq_u16 (uint16x8_t a)
5078 uint32_t result;
5079 __asm__ ("uaddlv %s0,%1.8h"
5080 : "=w"(result)
5081 : "w"(a)
5082 : /* No clobbers */);
5083 return result;
5086 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5087 vaddlvq_u32 (uint32x4_t a)
5089 uint64_t result;
5090 __asm__ ("uaddlv %d0,%1.4s"
5091 : "=w"(result)
5092 : "w"(a)
5093 : /* No clobbers */);
5094 return result;
5097 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5098 vcls_s8 (int8x8_t a)
5100 int8x8_t result;
5101 __asm__ ("cls %0.8b,%1.8b"
5102 : "=w"(result)
5103 : "w"(a)
5104 : /* No clobbers */);
5105 return result;
5108 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5109 vcls_s16 (int16x4_t a)
5111 int16x4_t result;
5112 __asm__ ("cls %0.4h,%1.4h"
5113 : "=w"(result)
5114 : "w"(a)
5115 : /* No clobbers */);
5116 return result;
5119 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5120 vcls_s32 (int32x2_t a)
5122 int32x2_t result;
5123 __asm__ ("cls %0.2s,%1.2s"
5124 : "=w"(result)
5125 : "w"(a)
5126 : /* No clobbers */);
5127 return result;
5130 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5131 vclsq_s8 (int8x16_t a)
5133 int8x16_t result;
5134 __asm__ ("cls %0.16b,%1.16b"
5135 : "=w"(result)
5136 : "w"(a)
5137 : /* No clobbers */);
5138 return result;
5141 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5142 vclsq_s16 (int16x8_t a)
5144 int16x8_t result;
5145 __asm__ ("cls %0.8h,%1.8h"
5146 : "=w"(result)
5147 : "w"(a)
5148 : /* No clobbers */);
5149 return result;
5152 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5153 vclsq_s32 (int32x4_t a)
5155 int32x4_t result;
5156 __asm__ ("cls %0.4s,%1.4s"
5157 : "=w"(result)
5158 : "w"(a)
5159 : /* No clobbers */);
5160 return result;
5163 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5164 vcnt_p8 (poly8x8_t a)
5166 poly8x8_t result;
5167 __asm__ ("cnt %0.8b,%1.8b"
5168 : "=w"(result)
5169 : "w"(a)
5170 : /* No clobbers */);
5171 return result;
5174 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5175 vcnt_s8 (int8x8_t a)
5177 int8x8_t result;
5178 __asm__ ("cnt %0.8b,%1.8b"
5179 : "=w"(result)
5180 : "w"(a)
5181 : /* No clobbers */);
5182 return result;
5185 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5186 vcnt_u8 (uint8x8_t a)
5188 uint8x8_t result;
5189 __asm__ ("cnt %0.8b,%1.8b"
5190 : "=w"(result)
5191 : "w"(a)
5192 : /* No clobbers */);
5193 return result;
5196 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5197 vcntq_p8 (poly8x16_t a)
5199 poly8x16_t result;
5200 __asm__ ("cnt %0.16b,%1.16b"
5201 : "=w"(result)
5202 : "w"(a)
5203 : /* No clobbers */);
5204 return result;
5207 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5208 vcntq_s8 (int8x16_t a)
5210 int8x16_t result;
5211 __asm__ ("cnt %0.16b,%1.16b"
5212 : "=w"(result)
5213 : "w"(a)
5214 : /* No clobbers */);
5215 return result;
5218 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5219 vcntq_u8 (uint8x16_t a)
5221 uint8x16_t result;
5222 __asm__ ("cnt %0.16b,%1.16b"
5223 : "=w"(result)
5224 : "w"(a)
5225 : /* No clobbers */);
5226 return result;
5229 #define vcopyq_lane_f32(a, b, c, d) \
5230 __extension__ \
5231 ({ \
5232 float32x4_t c_ = (c); \
5233 float32x4_t a_ = (a); \
5234 float32x4_t result; \
5235 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5236 : "=w"(result) \
5237 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5238 : /* No clobbers */); \
5239 result; \
5242 #define vcopyq_lane_f64(a, b, c, d) \
5243 __extension__ \
5244 ({ \
5245 float64x2_t c_ = (c); \
5246 float64x2_t a_ = (a); \
5247 float64x2_t result; \
5248 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5249 : "=w"(result) \
5250 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5251 : /* No clobbers */); \
5252 result; \
5255 #define vcopyq_lane_p8(a, b, c, d) \
5256 __extension__ \
5257 ({ \
5258 poly8x16_t c_ = (c); \
5259 poly8x16_t a_ = (a); \
5260 poly8x16_t result; \
5261 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5262 : "=w"(result) \
5263 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5264 : /* No clobbers */); \
5265 result; \
5268 #define vcopyq_lane_p16(a, b, c, d) \
5269 __extension__ \
5270 ({ \
5271 poly16x8_t c_ = (c); \
5272 poly16x8_t a_ = (a); \
5273 poly16x8_t result; \
5274 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5275 : "=w"(result) \
5276 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5277 : /* No clobbers */); \
5278 result; \
5281 #define vcopyq_lane_s8(a, b, c, d) \
5282 __extension__ \
5283 ({ \
5284 int8x16_t c_ = (c); \
5285 int8x16_t a_ = (a); \
5286 int8x16_t result; \
5287 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5288 : "=w"(result) \
5289 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5290 : /* No clobbers */); \
5291 result; \
5294 #define vcopyq_lane_s16(a, b, c, d) \
5295 __extension__ \
5296 ({ \
5297 int16x8_t c_ = (c); \
5298 int16x8_t a_ = (a); \
5299 int16x8_t result; \
5300 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5301 : "=w"(result) \
5302 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5303 : /* No clobbers */); \
5304 result; \
5307 #define vcopyq_lane_s32(a, b, c, d) \
5308 __extension__ \
5309 ({ \
5310 int32x4_t c_ = (c); \
5311 int32x4_t a_ = (a); \
5312 int32x4_t result; \
5313 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5314 : "=w"(result) \
5315 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5316 : /* No clobbers */); \
5317 result; \
5320 #define vcopyq_lane_s64(a, b, c, d) \
5321 __extension__ \
5322 ({ \
5323 int64x2_t c_ = (c); \
5324 int64x2_t a_ = (a); \
5325 int64x2_t result; \
5326 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5327 : "=w"(result) \
5328 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5329 : /* No clobbers */); \
5330 result; \
5333 #define vcopyq_lane_u8(a, b, c, d) \
5334 __extension__ \
5335 ({ \
5336 uint8x16_t c_ = (c); \
5337 uint8x16_t a_ = (a); \
5338 uint8x16_t result; \
5339 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5340 : "=w"(result) \
5341 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5342 : /* No clobbers */); \
5343 result; \
5346 #define vcopyq_lane_u16(a, b, c, d) \
5347 __extension__ \
5348 ({ \
5349 uint16x8_t c_ = (c); \
5350 uint16x8_t a_ = (a); \
5351 uint16x8_t result; \
5352 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5353 : "=w"(result) \
5354 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5355 : /* No clobbers */); \
5356 result; \
5359 #define vcopyq_lane_u32(a, b, c, d) \
5360 __extension__ \
5361 ({ \
5362 uint32x4_t c_ = (c); \
5363 uint32x4_t a_ = (a); \
5364 uint32x4_t result; \
5365 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5366 : "=w"(result) \
5367 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5368 : /* No clobbers */); \
5369 result; \
5372 #define vcopyq_lane_u64(a, b, c, d) \
5373 __extension__ \
5374 ({ \
5375 uint64x2_t c_ = (c); \
5376 uint64x2_t a_ = (a); \
5377 uint64x2_t result; \
5378 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5379 : "=w"(result) \
5380 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5381 : /* No clobbers */); \
5382 result; \
5385 /* vcvt_f16_f32 not supported */
5387 /* vcvt_f32_f16 not supported */
5389 /* vcvt_high_f16_f32 not supported */
5391 /* vcvt_high_f32_f16 not supported */
5393 static float32x2_t vdup_n_f32 (float32_t);
5395 #define vcvt_n_f32_s32(a, b) \
5396 __extension__ \
5397 ({ \
5398 int32x2_t a_ = (a); \
5399 float32x2_t result; \
5400 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5401 : "=w"(result) \
5402 : "w"(a_), "i"(b) \
5403 : /* No clobbers */); \
5404 result; \
5407 #define vcvt_n_f32_u32(a, b) \
5408 __extension__ \
5409 ({ \
5410 uint32x2_t a_ = (a); \
5411 float32x2_t result; \
5412 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5413 : "=w"(result) \
5414 : "w"(a_), "i"(b) \
5415 : /* No clobbers */); \
5416 result; \
5419 #define vcvt_n_s32_f32(a, b) \
5420 __extension__ \
5421 ({ \
5422 float32x2_t a_ = (a); \
5423 int32x2_t result; \
5424 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5425 : "=w"(result) \
5426 : "w"(a_), "i"(b) \
5427 : /* No clobbers */); \
5428 result; \
5431 #define vcvt_n_u32_f32(a, b) \
5432 __extension__ \
5433 ({ \
5434 float32x2_t a_ = (a); \
5435 uint32x2_t result; \
5436 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5437 : "=w"(result) \
5438 : "w"(a_), "i"(b) \
5439 : /* No clobbers */); \
5440 result; \
5443 #define vcvtd_n_f64_s64(a, b) \
5444 __extension__ \
5445 ({ \
5446 int64_t a_ = (a); \
5447 float64_t result; \
5448 __asm__ ("scvtf %d0,%d1,%2" \
5449 : "=w"(result) \
5450 : "w"(a_), "i"(b) \
5451 : /* No clobbers */); \
5452 result; \
5455 #define vcvtd_n_f64_u64(a, b) \
5456 __extension__ \
5457 ({ \
5458 uint64_t a_ = (a); \
5459 float64_t result; \
5460 __asm__ ("ucvtf %d0,%d1,%2" \
5461 : "=w"(result) \
5462 : "w"(a_), "i"(b) \
5463 : /* No clobbers */); \
5464 result; \
5467 #define vcvtd_n_s64_f64(a, b) \
5468 __extension__ \
5469 ({ \
5470 float64_t a_ = (a); \
5471 int64_t result; \
5472 __asm__ ("fcvtzs %d0,%d1,%2" \
5473 : "=w"(result) \
5474 : "w"(a_), "i"(b) \
5475 : /* No clobbers */); \
5476 result; \
5479 #define vcvtd_n_u64_f64(a, b) \
5480 __extension__ \
5481 ({ \
5482 float64_t a_ = (a); \
5483 uint64_t result; \
5484 __asm__ ("fcvtzu %d0,%d1,%2" \
5485 : "=w"(result) \
5486 : "w"(a_), "i"(b) \
5487 : /* No clobbers */); \
5488 result; \
5491 #define vcvtq_n_f32_s32(a, b) \
5492 __extension__ \
5493 ({ \
5494 int32x4_t a_ = (a); \
5495 float32x4_t result; \
5496 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5497 : "=w"(result) \
5498 : "w"(a_), "i"(b) \
5499 : /* No clobbers */); \
5500 result; \
5503 #define vcvtq_n_f32_u32(a, b) \
5504 __extension__ \
5505 ({ \
5506 uint32x4_t a_ = (a); \
5507 float32x4_t result; \
5508 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5509 : "=w"(result) \
5510 : "w"(a_), "i"(b) \
5511 : /* No clobbers */); \
5512 result; \
5515 #define vcvtq_n_f64_s64(a, b) \
5516 __extension__ \
5517 ({ \
5518 int64x2_t a_ = (a); \
5519 float64x2_t result; \
5520 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5521 : "=w"(result) \
5522 : "w"(a_), "i"(b) \
5523 : /* No clobbers */); \
5524 result; \
5527 #define vcvtq_n_f64_u64(a, b) \
5528 __extension__ \
5529 ({ \
5530 uint64x2_t a_ = (a); \
5531 float64x2_t result; \
5532 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5533 : "=w"(result) \
5534 : "w"(a_), "i"(b) \
5535 : /* No clobbers */); \
5536 result; \
5539 #define vcvtq_n_s32_f32(a, b) \
5540 __extension__ \
5541 ({ \
5542 float32x4_t a_ = (a); \
5543 int32x4_t result; \
5544 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5545 : "=w"(result) \
5546 : "w"(a_), "i"(b) \
5547 : /* No clobbers */); \
5548 result; \
5551 #define vcvtq_n_s64_f64(a, b) \
5552 __extension__ \
5553 ({ \
5554 float64x2_t a_ = (a); \
5555 int64x2_t result; \
5556 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5557 : "=w"(result) \
5558 : "w"(a_), "i"(b) \
5559 : /* No clobbers */); \
5560 result; \
5563 #define vcvtq_n_u32_f32(a, b) \
5564 __extension__ \
5565 ({ \
5566 float32x4_t a_ = (a); \
5567 uint32x4_t result; \
5568 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5569 : "=w"(result) \
5570 : "w"(a_), "i"(b) \
5571 : /* No clobbers */); \
5572 result; \
5575 #define vcvtq_n_u64_f64(a, b) \
5576 __extension__ \
5577 ({ \
5578 float64x2_t a_ = (a); \
5579 uint64x2_t result; \
5580 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5581 : "=w"(result) \
5582 : "w"(a_), "i"(b) \
5583 : /* No clobbers */); \
5584 result; \
5587 #define vcvts_n_f32_s32(a, b) \
5588 __extension__ \
5589 ({ \
5590 int32_t a_ = (a); \
5591 float32_t result; \
5592 __asm__ ("scvtf %s0,%s1,%2" \
5593 : "=w"(result) \
5594 : "w"(a_), "i"(b) \
5595 : /* No clobbers */); \
5596 result; \
5599 #define vcvts_n_f32_u32(a, b) \
5600 __extension__ \
5601 ({ \
5602 uint32_t a_ = (a); \
5603 float32_t result; \
5604 __asm__ ("ucvtf %s0,%s1,%2" \
5605 : "=w"(result) \
5606 : "w"(a_), "i"(b) \
5607 : /* No clobbers */); \
5608 result; \
5611 #define vcvts_n_s32_f32(a, b) \
5612 __extension__ \
5613 ({ \
5614 float32_t a_ = (a); \
5615 int32_t result; \
5616 __asm__ ("fcvtzs %s0,%s1,%2" \
5617 : "=w"(result) \
5618 : "w"(a_), "i"(b) \
5619 : /* No clobbers */); \
5620 result; \
5623 #define vcvts_n_u32_f32(a, b) \
5624 __extension__ \
5625 ({ \
5626 float32_t a_ = (a); \
5627 uint32_t result; \
5628 __asm__ ("fcvtzu %s0,%s1,%2" \
5629 : "=w"(result) \
5630 : "w"(a_), "i"(b) \
5631 : /* No clobbers */); \
5632 result; \
5635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5636 vcvtx_f32_f64 (float64x2_t a)
5638 float32x2_t result;
5639 __asm__ ("fcvtxn %0.2s,%1.2d"
5640 : "=w"(result)
5641 : "w"(a)
5642 : /* No clobbers */);
5643 return result;
5646 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5647 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5649 float32x4_t result;
5650 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5651 : "=w"(result)
5652 : "w" (b), "0"(a)
5653 : /* No clobbers */);
5654 return result;
5657 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5658 vcvtxd_f32_f64 (float64_t a)
5660 float32_t result;
5661 __asm__ ("fcvtxn %s0,%d1"
5662 : "=w"(result)
5663 : "w"(a)
5664 : /* No clobbers */);
5665 return result;
5668 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5669 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5671 float32x2_t result;
5672 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5673 : "=w"(result)
5674 : "0"(a), "w"(b), "w"(c)
5675 : /* No clobbers */);
5676 return result;
5679 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5680 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5682 float32x4_t result;
5683 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5684 : "=w"(result)
5685 : "0"(a), "w"(b), "w"(c)
5686 : /* No clobbers */);
5687 return result;
5690 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5691 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5693 float64x2_t result;
5694 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5695 : "=w"(result)
5696 : "0"(a), "w"(b), "w"(c)
5697 : /* No clobbers */);
5698 return result;
5701 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5702 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5704 float32x2_t result;
5705 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5706 : "=w"(result)
5707 : "0"(a), "w"(b), "w"(c)
5708 : /* No clobbers */);
5709 return result;
5712 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5713 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5715 float32x4_t result;
5716 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5717 : "=w"(result)
5718 : "0"(a), "w"(b), "w"(c)
5719 : /* No clobbers */);
5720 return result;
5723 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5724 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5726 float64x2_t result;
5727 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5728 : "=w"(result)
5729 : "0"(a), "w"(b), "w"(c)
5730 : /* No clobbers */);
5731 return result;
5734 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5735 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5737 float32x2_t result;
5738 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5739 : "=w"(result)
5740 : "0"(a), "w"(b), "w"(c)
5741 : /* No clobbers */);
5742 return result;
5745 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5746 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5748 float32x4_t result;
5749 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5750 : "=w"(result)
5751 : "0"(a), "w"(b), "w"(c)
5752 : /* No clobbers */);
5753 return result;
5756 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5757 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5759 float64x2_t result;
5760 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5761 : "=w"(result)
5762 : "0"(a), "w"(b), "w"(c)
5763 : /* No clobbers */);
5764 return result;
5767 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5768 vget_high_f32 (float32x4_t a)
5770 float32x2_t result;
5771 __asm__ ("ins %0.d[0], %1.d[1]"
5772 : "=w"(result)
5773 : "w"(a)
5774 : /* No clobbers */);
5775 return result;
5778 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5779 vget_high_f64 (float64x2_t a)
5781 float64x1_t result;
5782 __asm__ ("ins %0.d[0], %1.d[1]"
5783 : "=w"(result)
5784 : "w"(a)
5785 : /* No clobbers */);
5786 return result;
5789 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5790 vget_high_p8 (poly8x16_t a)
5792 poly8x8_t result;
5793 __asm__ ("ins %0.d[0], %1.d[1]"
5794 : "=w"(result)
5795 : "w"(a)
5796 : /* No clobbers */);
5797 return result;
5800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5801 vget_high_p16 (poly16x8_t a)
5803 poly16x4_t result;
5804 __asm__ ("ins %0.d[0], %1.d[1]"
5805 : "=w"(result)
5806 : "w"(a)
5807 : /* No clobbers */);
5808 return result;
5811 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5812 vget_high_s8 (int8x16_t a)
5814 int8x8_t result;
5815 __asm__ ("ins %0.d[0], %1.d[1]"
5816 : "=w"(result)
5817 : "w"(a)
5818 : /* No clobbers */);
5819 return result;
5822 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5823 vget_high_s16 (int16x8_t a)
5825 int16x4_t result;
5826 __asm__ ("ins %0.d[0], %1.d[1]"
5827 : "=w"(result)
5828 : "w"(a)
5829 : /* No clobbers */);
5830 return result;
5833 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5834 vget_high_s32 (int32x4_t a)
5836 int32x2_t result;
5837 __asm__ ("ins %0.d[0], %1.d[1]"
5838 : "=w"(result)
5839 : "w"(a)
5840 : /* No clobbers */);
5841 return result;
5844 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5845 vget_high_s64 (int64x2_t a)
5847 int64x1_t result;
5848 __asm__ ("ins %0.d[0], %1.d[1]"
5849 : "=w"(result)
5850 : "w"(a)
5851 : /* No clobbers */);
5852 return result;
5855 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5856 vget_high_u8 (uint8x16_t a)
5858 uint8x8_t result;
5859 __asm__ ("ins %0.d[0], %1.d[1]"
5860 : "=w"(result)
5861 : "w"(a)
5862 : /* No clobbers */);
5863 return result;
5866 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5867 vget_high_u16 (uint16x8_t a)
5869 uint16x4_t result;
5870 __asm__ ("ins %0.d[0], %1.d[1]"
5871 : "=w"(result)
5872 : "w"(a)
5873 : /* No clobbers */);
5874 return result;
5877 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5878 vget_high_u32 (uint32x4_t a)
5880 uint32x2_t result;
5881 __asm__ ("ins %0.d[0], %1.d[1]"
5882 : "=w"(result)
5883 : "w"(a)
5884 : /* No clobbers */);
5885 return result;
5888 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5889 vget_high_u64 (uint64x2_t a)
5891 uint64x1_t result;
5892 __asm__ ("ins %0.d[0], %1.d[1]"
5893 : "=w"(result)
5894 : "w"(a)
5895 : /* No clobbers */);
5896 return result;
5899 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5900 vhsub_s8 (int8x8_t a, int8x8_t b)
5902 int8x8_t result;
5903 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5904 : "=w"(result)
5905 : "w"(a), "w"(b)
5906 : /* No clobbers */);
5907 return result;
5910 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5911 vhsub_s16 (int16x4_t a, int16x4_t b)
5913 int16x4_t result;
5914 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5915 : "=w"(result)
5916 : "w"(a), "w"(b)
5917 : /* No clobbers */);
5918 return result;
5921 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5922 vhsub_s32 (int32x2_t a, int32x2_t b)
5924 int32x2_t result;
5925 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5926 : "=w"(result)
5927 : "w"(a), "w"(b)
5928 : /* No clobbers */);
5929 return result;
5932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5933 vhsub_u8 (uint8x8_t a, uint8x8_t b)
5935 uint8x8_t result;
5936 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5937 : "=w"(result)
5938 : "w"(a), "w"(b)
5939 : /* No clobbers */);
5940 return result;
5943 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5944 vhsub_u16 (uint16x4_t a, uint16x4_t b)
5946 uint16x4_t result;
5947 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
5948 : "=w"(result)
5949 : "w"(a), "w"(b)
5950 : /* No clobbers */);
5951 return result;
5954 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5955 vhsub_u32 (uint32x2_t a, uint32x2_t b)
5957 uint32x2_t result;
5958 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
5959 : "=w"(result)
5960 : "w"(a), "w"(b)
5961 : /* No clobbers */);
5962 return result;
5965 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5966 vhsubq_s8 (int8x16_t a, int8x16_t b)
5968 int8x16_t result;
5969 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
5970 : "=w"(result)
5971 : "w"(a), "w"(b)
5972 : /* No clobbers */);
5973 return result;
5976 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5977 vhsubq_s16 (int16x8_t a, int16x8_t b)
5979 int16x8_t result;
5980 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
5981 : "=w"(result)
5982 : "w"(a), "w"(b)
5983 : /* No clobbers */);
5984 return result;
5987 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5988 vhsubq_s32 (int32x4_t a, int32x4_t b)
5990 int32x4_t result;
5991 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
5992 : "=w"(result)
5993 : "w"(a), "w"(b)
5994 : /* No clobbers */);
5995 return result;
5998 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5999 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6001 uint8x16_t result;
6002 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6003 : "=w"(result)
6004 : "w"(a), "w"(b)
6005 : /* No clobbers */);
6006 return result;
6009 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6010 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6012 uint16x8_t result;
6013 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6014 : "=w"(result)
6015 : "w"(a), "w"(b)
6016 : /* No clobbers */);
6017 return result;
6020 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6021 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6023 uint32x4_t result;
6024 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6025 : "=w"(result)
6026 : "w"(a), "w"(b)
6027 : /* No clobbers */);
6028 return result;
6031 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6032 vld1_dup_f32 (const float32_t * a)
6034 float32x2_t result;
6035 __asm__ ("ld1r {%0.2s}, %1"
6036 : "=w"(result)
6037 : "Utv"(*a)
6038 : /* No clobbers */);
6039 return result;
6042 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6043 vld1_dup_f64 (const float64_t * a)
6045 float64x1_t result;
6046 __asm__ ("ld1r {%0.1d}, %1"
6047 : "=w"(result)
6048 : "Utv"(*a)
6049 : /* No clobbers */);
6050 return result;
6053 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6054 vld1_dup_p8 (const poly8_t * a)
6056 poly8x8_t result;
6057 __asm__ ("ld1r {%0.8b}, %1"
6058 : "=w"(result)
6059 : "Utv"(*a)
6060 : /* No clobbers */);
6061 return result;
6064 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6065 vld1_dup_p16 (const poly16_t * a)
6067 poly16x4_t result;
6068 __asm__ ("ld1r {%0.4h}, %1"
6069 : "=w"(result)
6070 : "Utv"(*a)
6071 : /* No clobbers */);
6072 return result;
6075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6076 vld1_dup_s8 (const int8_t * a)
6078 int8x8_t result;
6079 __asm__ ("ld1r {%0.8b}, %1"
6080 : "=w"(result)
6081 : "Utv"(*a)
6082 : /* No clobbers */);
6083 return result;
6086 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6087 vld1_dup_s16 (const int16_t * a)
6089 int16x4_t result;
6090 __asm__ ("ld1r {%0.4h}, %1"
6091 : "=w"(result)
6092 : "Utv"(*a)
6093 : /* No clobbers */);
6094 return result;
6097 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6098 vld1_dup_s32 (const int32_t * a)
6100 int32x2_t result;
6101 __asm__ ("ld1r {%0.2s}, %1"
6102 : "=w"(result)
6103 : "Utv"(*a)
6104 : /* No clobbers */);
6105 return result;
6108 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6109 vld1_dup_s64 (const int64_t * a)
6111 int64x1_t result;
6112 __asm__ ("ld1r {%0.1d}, %1"
6113 : "=w"(result)
6114 : "Utv"(*a)
6115 : /* No clobbers */);
6116 return result;
6119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6120 vld1_dup_u8 (const uint8_t * a)
6122 uint8x8_t result;
6123 __asm__ ("ld1r {%0.8b}, %1"
6124 : "=w"(result)
6125 : "Utv"(*a)
6126 : /* No clobbers */);
6127 return result;
6130 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6131 vld1_dup_u16 (const uint16_t * a)
6133 uint16x4_t result;
6134 __asm__ ("ld1r {%0.4h}, %1"
6135 : "=w"(result)
6136 : "Utv"(*a)
6137 : /* No clobbers */);
6138 return result;
6141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6142 vld1_dup_u32 (const uint32_t * a)
6144 uint32x2_t result;
6145 __asm__ ("ld1r {%0.2s}, %1"
6146 : "=w"(result)
6147 : "Utv"(*a)
6148 : /* No clobbers */);
6149 return result;
6152 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6153 vld1_dup_u64 (const uint64_t * a)
6155 uint64x1_t result;
6156 __asm__ ("ld1r {%0.1d}, %1"
6157 : "=w"(result)
6158 : "Utv"(*a)
6159 : /* No clobbers */);
6160 return result;
6163 #define vld1_lane_f32(a, b, c) \
6164 __extension__ \
6165 ({ \
6166 float32x2_t b_ = (b); \
6167 const float32_t * a_ = (a); \
6168 float32x2_t result; \
6169 __asm__ ("ld1 {%0.s}[%1], %2" \
6170 : "=w"(result) \
6171 : "i" (c), "Utv"(*a_), "0"(b_) \
6172 : /* No clobbers */); \
6173 result; \
6176 #define vld1_lane_f64(a, b, c) \
6177 __extension__ \
6178 ({ \
6179 float64x1_t b_ = (b); \
6180 const float64_t * a_ = (a); \
6181 float64x1_t result; \
6182 __asm__ ("ld1 {%0.d}[%1], %2" \
6183 : "=w"(result) \
6184 : "i" (c), "Utv"(*a_), "0"(b_) \
6185 : /* No clobbers */); \
6186 result; \
6189 #define vld1_lane_p8(a, b, c) \
6190 __extension__ \
6191 ({ \
6192 poly8x8_t b_ = (b); \
6193 const poly8_t * a_ = (a); \
6194 poly8x8_t result; \
6195 __asm__ ("ld1 {%0.b}[%1], %2" \
6196 : "=w"(result) \
6197 : "i" (c), "Utv"(*a_), "0"(b_) \
6198 : /* No clobbers */); \
6199 result; \
6202 #define vld1_lane_p16(a, b, c) \
6203 __extension__ \
6204 ({ \
6205 poly16x4_t b_ = (b); \
6206 const poly16_t * a_ = (a); \
6207 poly16x4_t result; \
6208 __asm__ ("ld1 {%0.h}[%1], %2" \
6209 : "=w"(result) \
6210 : "i" (c), "Utv"(*a_), "0"(b_) \
6211 : /* No clobbers */); \
6212 result; \
6215 #define vld1_lane_s8(a, b, c) \
6216 __extension__ \
6217 ({ \
6218 int8x8_t b_ = (b); \
6219 const int8_t * a_ = (a); \
6220 int8x8_t result; \
6221 __asm__ ("ld1 {%0.b}[%1], %2" \
6222 : "=w"(result) \
6223 : "i" (c), "Utv"(*a_), "0"(b_) \
6224 : /* No clobbers */); \
6225 result; \
6228 #define vld1_lane_s16(a, b, c) \
6229 __extension__ \
6230 ({ \
6231 int16x4_t b_ = (b); \
6232 const int16_t * a_ = (a); \
6233 int16x4_t result; \
6234 __asm__ ("ld1 {%0.h}[%1], %2" \
6235 : "=w"(result) \
6236 : "i" (c), "Utv"(*a_), "0"(b_) \
6237 : /* No clobbers */); \
6238 result; \
6241 #define vld1_lane_s32(a, b, c) \
6242 __extension__ \
6243 ({ \
6244 int32x2_t b_ = (b); \
6245 const int32_t * a_ = (a); \
6246 int32x2_t result; \
6247 __asm__ ("ld1 {%0.s}[%1], %2" \
6248 : "=w"(result) \
6249 : "i" (c), "Utv"(*a_), "0"(b_) \
6250 : /* No clobbers */); \
6251 result; \
6254 #define vld1_lane_s64(a, b, c) \
6255 __extension__ \
6256 ({ \
6257 int64x1_t b_ = (b); \
6258 const int64_t * a_ = (a); \
6259 int64x1_t result; \
6260 __asm__ ("ld1 {%0.d}[%1], %2" \
6261 : "=w"(result) \
6262 : "i" (c), "Utv"(*a_), "0"(b_) \
6263 : /* No clobbers */); \
6264 result; \
6267 #define vld1_lane_u8(a, b, c) \
6268 __extension__ \
6269 ({ \
6270 uint8x8_t b_ = (b); \
6271 const uint8_t * a_ = (a); \
6272 uint8x8_t result; \
6273 __asm__ ("ld1 {%0.b}[%1], %2" \
6274 : "=w"(result) \
6275 : "i" (c), "Utv"(*a_), "0"(b_) \
6276 : /* No clobbers */); \
6277 result; \
6280 #define vld1_lane_u16(a, b, c) \
6281 __extension__ \
6282 ({ \
6283 uint16x4_t b_ = (b); \
6284 const uint16_t * a_ = (a); \
6285 uint16x4_t result; \
6286 __asm__ ("ld1 {%0.h}[%1], %2" \
6287 : "=w"(result) \
6288 : "i" (c), "Utv"(*a_), "0"(b_) \
6289 : /* No clobbers */); \
6290 result; \
6293 #define vld1_lane_u32(a, b, c) \
6294 __extension__ \
6295 ({ \
6296 uint32x2_t b_ = (b); \
6297 const uint32_t * a_ = (a); \
6298 uint32x2_t result; \
6299 __asm__ ("ld1 {%0.s}[%1], %2" \
6300 : "=w"(result) \
6301 : "i" (c), "Utv"(*a_), "0"(b_) \
6302 : /* No clobbers */); \
6303 result; \
6306 #define vld1_lane_u64(a, b, c) \
6307 __extension__ \
6308 ({ \
6309 uint64x1_t b_ = (b); \
6310 const uint64_t * a_ = (a); \
6311 uint64x1_t result; \
6312 __asm__ ("ld1 {%0.d}[%1], %2" \
6313 : "=w"(result) \
6314 : "i" (c), "Utv"(*a_), "0"(b_) \
6315 : /* No clobbers */); \
6316 result; \
6319 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6320 vld1q_dup_f32 (const float32_t * a)
6322 float32x4_t result;
6323 __asm__ ("ld1r {%0.4s}, %1"
6324 : "=w"(result)
6325 : "Utv"(*a)
6326 : /* No clobbers */);
6327 return result;
6330 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6331 vld1q_dup_f64 (const float64_t * a)
6333 float64x2_t result;
6334 __asm__ ("ld1r {%0.2d}, %1"
6335 : "=w"(result)
6336 : "Utv"(*a)
6337 : /* No clobbers */);
6338 return result;
6341 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6342 vld1q_dup_p8 (const poly8_t * a)
6344 poly8x16_t result;
6345 __asm__ ("ld1r {%0.16b}, %1"
6346 : "=w"(result)
6347 : "Utv"(*a)
6348 : /* No clobbers */);
6349 return result;
6352 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6353 vld1q_dup_p16 (const poly16_t * a)
6355 poly16x8_t result;
6356 __asm__ ("ld1r {%0.8h}, %1"
6357 : "=w"(result)
6358 : "Utv"(*a)
6359 : /* No clobbers */);
6360 return result;
6363 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6364 vld1q_dup_s8 (const int8_t * a)
6366 int8x16_t result;
6367 __asm__ ("ld1r {%0.16b}, %1"
6368 : "=w"(result)
6369 : "Utv"(*a)
6370 : /* No clobbers */);
6371 return result;
6374 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6375 vld1q_dup_s16 (const int16_t * a)
6377 int16x8_t result;
6378 __asm__ ("ld1r {%0.8h}, %1"
6379 : "=w"(result)
6380 : "Utv"(*a)
6381 : /* No clobbers */);
6382 return result;
6385 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6386 vld1q_dup_s32 (const int32_t * a)
6388 int32x4_t result;
6389 __asm__ ("ld1r {%0.4s}, %1"
6390 : "=w"(result)
6391 : "Utv"(*a)
6392 : /* No clobbers */);
6393 return result;
6396 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6397 vld1q_dup_s64 (const int64_t * a)
6399 int64x2_t result;
6400 __asm__ ("ld1r {%0.2d}, %1"
6401 : "=w"(result)
6402 : "Utv"(*a)
6403 : /* No clobbers */);
6404 return result;
6407 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6408 vld1q_dup_u8 (const uint8_t * a)
6410 uint8x16_t result;
6411 __asm__ ("ld1r {%0.16b}, %1"
6412 : "=w"(result)
6413 : "Utv"(*a)
6414 : /* No clobbers */);
6415 return result;
6418 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6419 vld1q_dup_u16 (const uint16_t * a)
6421 uint16x8_t result;
6422 __asm__ ("ld1r {%0.8h}, %1"
6423 : "=w"(result)
6424 : "Utv"(*a)
6425 : /* No clobbers */);
6426 return result;
6429 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6430 vld1q_dup_u32 (const uint32_t * a)
6432 uint32x4_t result;
6433 __asm__ ("ld1r {%0.4s}, %1"
6434 : "=w"(result)
6435 : "Utv"(*a)
6436 : /* No clobbers */);
6437 return result;
6440 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6441 vld1q_dup_u64 (const uint64_t * a)
6443 uint64x2_t result;
6444 __asm__ ("ld1r {%0.2d}, %1"
6445 : "=w"(result)
6446 : "Utv"(*a)
6447 : /* No clobbers */);
6448 return result;
6451 #define vld1q_lane_f32(a, b, c) \
6452 __extension__ \
6453 ({ \
6454 float32x4_t b_ = (b); \
6455 const float32_t * a_ = (a); \
6456 float32x4_t result; \
6457 __asm__ ("ld1 {%0.s}[%1], %2" \
6458 : "=w"(result) \
6459 : "i"(c), "Utv"(*a_), "0"(b_) \
6460 : /* No clobbers */); \
6461 result; \
6464 #define vld1q_lane_f64(a, b, c) \
6465 __extension__ \
6466 ({ \
6467 float64x2_t b_ = (b); \
6468 const float64_t * a_ = (a); \
6469 float64x2_t result; \
6470 __asm__ ("ld1 {%0.d}[%1], %2" \
6471 : "=w"(result) \
6472 : "i"(c), "Utv"(*a_), "0"(b_) \
6473 : /* No clobbers */); \
6474 result; \
6477 #define vld1q_lane_p8(a, b, c) \
6478 __extension__ \
6479 ({ \
6480 poly8x16_t b_ = (b); \
6481 const poly8_t * a_ = (a); \
6482 poly8x16_t result; \
6483 __asm__ ("ld1 {%0.b}[%1], %2" \
6484 : "=w"(result) \
6485 : "i"(c), "Utv"(*a_), "0"(b_) \
6486 : /* No clobbers */); \
6487 result; \
6490 #define vld1q_lane_p16(a, b, c) \
6491 __extension__ \
6492 ({ \
6493 poly16x8_t b_ = (b); \
6494 const poly16_t * a_ = (a); \
6495 poly16x8_t result; \
6496 __asm__ ("ld1 {%0.h}[%1], %2" \
6497 : "=w"(result) \
6498 : "i"(c), "Utv"(*a_), "0"(b_) \
6499 : /* No clobbers */); \
6500 result; \
6503 #define vld1q_lane_s8(a, b, c) \
6504 __extension__ \
6505 ({ \
6506 int8x16_t b_ = (b); \
6507 const int8_t * a_ = (a); \
6508 int8x16_t result; \
6509 __asm__ ("ld1 {%0.b}[%1], %2" \
6510 : "=w"(result) \
6511 : "i"(c), "Utv"(*a_), "0"(b_) \
6512 : /* No clobbers */); \
6513 result; \
6516 #define vld1q_lane_s16(a, b, c) \
6517 __extension__ \
6518 ({ \
6519 int16x8_t b_ = (b); \
6520 const int16_t * a_ = (a); \
6521 int16x8_t result; \
6522 __asm__ ("ld1 {%0.h}[%1], %2" \
6523 : "=w"(result) \
6524 : "i"(c), "Utv"(*a_), "0"(b_) \
6525 : /* No clobbers */); \
6526 result; \
6529 #define vld1q_lane_s32(a, b, c) \
6530 __extension__ \
6531 ({ \
6532 int32x4_t b_ = (b); \
6533 const int32_t * a_ = (a); \
6534 int32x4_t result; \
6535 __asm__ ("ld1 {%0.s}[%1], %2" \
6536 : "=w"(result) \
6537 : "i"(c), "Utv"(*a_), "0"(b_) \
6538 : /* No clobbers */); \
6539 result; \
6542 #define vld1q_lane_s64(a, b, c) \
6543 __extension__ \
6544 ({ \
6545 int64x2_t b_ = (b); \
6546 const int64_t * a_ = (a); \
6547 int64x2_t result; \
6548 __asm__ ("ld1 {%0.d}[%1], %2" \
6549 : "=w"(result) \
6550 : "i"(c), "Utv"(*a_), "0"(b_) \
6551 : /* No clobbers */); \
6552 result; \
6555 #define vld1q_lane_u8(a, b, c) \
6556 __extension__ \
6557 ({ \
6558 uint8x16_t b_ = (b); \
6559 const uint8_t * a_ = (a); \
6560 uint8x16_t result; \
6561 __asm__ ("ld1 {%0.b}[%1], %2" \
6562 : "=w"(result) \
6563 : "i"(c), "Utv"(*a_), "0"(b_) \
6564 : /* No clobbers */); \
6565 result; \
6568 #define vld1q_lane_u16(a, b, c) \
6569 __extension__ \
6570 ({ \
6571 uint16x8_t b_ = (b); \
6572 const uint16_t * a_ = (a); \
6573 uint16x8_t result; \
6574 __asm__ ("ld1 {%0.h}[%1], %2" \
6575 : "=w"(result) \
6576 : "i"(c), "Utv"(*a_), "0"(b_) \
6577 : /* No clobbers */); \
6578 result; \
6581 #define vld1q_lane_u32(a, b, c) \
6582 __extension__ \
6583 ({ \
6584 uint32x4_t b_ = (b); \
6585 const uint32_t * a_ = (a); \
6586 uint32x4_t result; \
6587 __asm__ ("ld1 {%0.s}[%1], %2" \
6588 : "=w"(result) \
6589 : "i"(c), "Utv"(*a_), "0"(b_) \
6590 : /* No clobbers */); \
6591 result; \
6594 #define vld1q_lane_u64(a, b, c) \
6595 __extension__ \
6596 ({ \
6597 uint64x2_t b_ = (b); \
6598 const uint64_t * a_ = (a); \
6599 uint64x2_t result; \
6600 __asm__ ("ld1 {%0.d}[%1], %2" \
6601 : "=w"(result) \
6602 : "i"(c), "Utv"(*a_), "0"(b_) \
6603 : /* No clobbers */); \
6604 result; \
6607 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6608 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6610 float32x2_t result;
6611 float32x2_t t1;
6612 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6613 : "=w"(result), "=w"(t1)
6614 : "0"(a), "w"(b), "w"(c)
6615 : /* No clobbers */);
6616 return result;
6619 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6620 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6622 int16x4_t result;
6623 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6624 : "=w"(result)
6625 : "0"(a), "w"(b), "x"(c)
6626 : /* No clobbers */);
6627 return result;
6630 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6631 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6633 int32x2_t result;
6634 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6635 : "=w"(result)
6636 : "0"(a), "w"(b), "w"(c)
6637 : /* No clobbers */);
6638 return result;
6641 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6642 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6644 uint16x4_t result;
6645 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6646 : "=w"(result)
6647 : "0"(a), "w"(b), "x"(c)
6648 : /* No clobbers */);
6649 return result;
6652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6653 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6655 uint32x2_t result;
6656 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6657 : "=w"(result)
6658 : "0"(a), "w"(b), "w"(c)
6659 : /* No clobbers */);
6660 return result;
6663 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6664 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6666 int8x8_t result;
6667 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6668 : "=w"(result)
6669 : "0"(a), "w"(b), "w"(c)
6670 : /* No clobbers */);
6671 return result;
6674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6675 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6677 int16x4_t result;
6678 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6679 : "=w"(result)
6680 : "0"(a), "w"(b), "w"(c)
6681 : /* No clobbers */);
6682 return result;
6685 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6686 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6688 int32x2_t result;
6689 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6690 : "=w"(result)
6691 : "0"(a), "w"(b), "w"(c)
6692 : /* No clobbers */);
6693 return result;
6696 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6697 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6699 uint8x8_t result;
6700 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6701 : "=w"(result)
6702 : "0"(a), "w"(b), "w"(c)
6703 : /* No clobbers */);
6704 return result;
6707 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6708 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6710 uint16x4_t result;
6711 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6712 : "=w"(result)
6713 : "0"(a), "w"(b), "w"(c)
6714 : /* No clobbers */);
6715 return result;
6718 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6719 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6721 uint32x2_t result;
6722 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6723 : "=w"(result)
6724 : "0"(a), "w"(b), "w"(c)
6725 : /* No clobbers */);
6726 return result;
6729 #define vmlal_high_lane_s16(a, b, c, d) \
6730 __extension__ \
6731 ({ \
6732 int16x4_t c_ = (c); \
6733 int16x8_t b_ = (b); \
6734 int32x4_t a_ = (a); \
6735 int32x4_t result; \
6736 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6737 : "=w"(result) \
6738 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6739 : /* No clobbers */); \
6740 result; \
6743 #define vmlal_high_lane_s32(a, b, c, d) \
6744 __extension__ \
6745 ({ \
6746 int32x2_t c_ = (c); \
6747 int32x4_t b_ = (b); \
6748 int64x2_t a_ = (a); \
6749 int64x2_t result; \
6750 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6751 : "=w"(result) \
6752 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6753 : /* No clobbers */); \
6754 result; \
6757 #define vmlal_high_lane_u16(a, b, c, d) \
6758 __extension__ \
6759 ({ \
6760 uint16x4_t c_ = (c); \
6761 uint16x8_t b_ = (b); \
6762 uint32x4_t a_ = (a); \
6763 uint32x4_t result; \
6764 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6765 : "=w"(result) \
6766 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6767 : /* No clobbers */); \
6768 result; \
6771 #define vmlal_high_lane_u32(a, b, c, d) \
6772 __extension__ \
6773 ({ \
6774 uint32x2_t c_ = (c); \
6775 uint32x4_t b_ = (b); \
6776 uint64x2_t a_ = (a); \
6777 uint64x2_t result; \
6778 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6779 : "=w"(result) \
6780 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6781 : /* No clobbers */); \
6782 result; \
6785 #define vmlal_high_laneq_s16(a, b, c, d) \
6786 __extension__ \
6787 ({ \
6788 int16x8_t c_ = (c); \
6789 int16x8_t b_ = (b); \
6790 int32x4_t a_ = (a); \
6791 int32x4_t result; \
6792 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6793 : "=w"(result) \
6794 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6795 : /* No clobbers */); \
6796 result; \
6799 #define vmlal_high_laneq_s32(a, b, c, d) \
6800 __extension__ \
6801 ({ \
6802 int32x4_t c_ = (c); \
6803 int32x4_t b_ = (b); \
6804 int64x2_t a_ = (a); \
6805 int64x2_t result; \
6806 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6807 : "=w"(result) \
6808 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6809 : /* No clobbers */); \
6810 result; \
6813 #define vmlal_high_laneq_u16(a, b, c, d) \
6814 __extension__ \
6815 ({ \
6816 uint16x8_t c_ = (c); \
6817 uint16x8_t b_ = (b); \
6818 uint32x4_t a_ = (a); \
6819 uint32x4_t result; \
6820 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6821 : "=w"(result) \
6822 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6823 : /* No clobbers */); \
6824 result; \
6827 #define vmlal_high_laneq_u32(a, b, c, d) \
6828 __extension__ \
6829 ({ \
6830 uint32x4_t c_ = (c); \
6831 uint32x4_t b_ = (b); \
6832 uint64x2_t a_ = (a); \
6833 uint64x2_t result; \
6834 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6835 : "=w"(result) \
6836 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6837 : /* No clobbers */); \
6838 result; \
6841 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6842 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6844 int32x4_t result;
6845 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6846 : "=w"(result)
6847 : "0"(a), "w"(b), "x"(c)
6848 : /* No clobbers */);
6849 return result;
6852 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6853 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6855 int64x2_t result;
6856 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6857 : "=w"(result)
6858 : "0"(a), "w"(b), "w"(c)
6859 : /* No clobbers */);
6860 return result;
6863 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6864 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6866 uint32x4_t result;
6867 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6868 : "=w"(result)
6869 : "0"(a), "w"(b), "x"(c)
6870 : /* No clobbers */);
6871 return result;
6874 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6875 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6877 uint64x2_t result;
6878 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6879 : "=w"(result)
6880 : "0"(a), "w"(b), "w"(c)
6881 : /* No clobbers */);
6882 return result;
6885 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6886 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6888 int16x8_t result;
6889 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6890 : "=w"(result)
6891 : "0"(a), "w"(b), "w"(c)
6892 : /* No clobbers */);
6893 return result;
6896 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6897 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6899 int32x4_t result;
6900 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6901 : "=w"(result)
6902 : "0"(a), "w"(b), "w"(c)
6903 : /* No clobbers */);
6904 return result;
6907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6908 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6910 int64x2_t result;
6911 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6912 : "=w"(result)
6913 : "0"(a), "w"(b), "w"(c)
6914 : /* No clobbers */);
6915 return result;
6918 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6919 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6921 uint16x8_t result;
6922 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6923 : "=w"(result)
6924 : "0"(a), "w"(b), "w"(c)
6925 : /* No clobbers */);
6926 return result;
6929 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6930 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6932 uint32x4_t result;
6933 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6934 : "=w"(result)
6935 : "0"(a), "w"(b), "w"(c)
6936 : /* No clobbers */);
6937 return result;
6940 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6941 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6943 uint64x2_t result;
6944 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6945 : "=w"(result)
6946 : "0"(a), "w"(b), "w"(c)
6947 : /* No clobbers */);
6948 return result;
6951 #define vmlal_lane_s16(a, b, c, d) \
6952 __extension__ \
6953 ({ \
6954 int16x4_t c_ = (c); \
6955 int16x4_t b_ = (b); \
6956 int32x4_t a_ = (a); \
6957 int32x4_t result; \
6958 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6959 : "=w"(result) \
6960 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6961 : /* No clobbers */); \
6962 result; \
6965 #define vmlal_lane_s32(a, b, c, d) \
6966 __extension__ \
6967 ({ \
6968 int32x2_t c_ = (c); \
6969 int32x2_t b_ = (b); \
6970 int64x2_t a_ = (a); \
6971 int64x2_t result; \
6972 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6973 : "=w"(result) \
6974 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6975 : /* No clobbers */); \
6976 result; \
6979 #define vmlal_lane_u16(a, b, c, d) \
6980 __extension__ \
6981 ({ \
6982 uint16x4_t c_ = (c); \
6983 uint16x4_t b_ = (b); \
6984 uint32x4_t a_ = (a); \
6985 uint32x4_t result; \
6986 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6987 : "=w"(result) \
6988 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6989 : /* No clobbers */); \
6990 result; \
6993 #define vmlal_lane_u32(a, b, c, d) \
6994 __extension__ \
6995 ({ \
6996 uint32x2_t c_ = (c); \
6997 uint32x2_t b_ = (b); \
6998 uint64x2_t a_ = (a); \
6999 uint64x2_t result; \
7000 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7001 : "=w"(result) \
7002 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7003 : /* No clobbers */); \
7004 result; \
7007 #define vmlal_laneq_s16(a, b, c, d) \
7008 __extension__ \
7009 ({ \
7010 int16x8_t c_ = (c); \
7011 int16x4_t b_ = (b); \
7012 int32x4_t a_ = (a); \
7013 int32x4_t result; \
7014 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7015 : "=w"(result) \
7016 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7017 : /* No clobbers */); \
7018 result; \
7021 #define vmlal_laneq_s32(a, b, c, d) \
7022 __extension__ \
7023 ({ \
7024 int32x4_t c_ = (c); \
7025 int32x2_t b_ = (b); \
7026 int64x2_t a_ = (a); \
7027 int64x2_t result; \
7028 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7029 : "=w"(result) \
7030 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7031 : /* No clobbers */); \
7032 result; \
7035 #define vmlal_laneq_u16(a, b, c, d) \
7036 __extension__ \
7037 ({ \
7038 uint16x8_t c_ = (c); \
7039 uint16x4_t b_ = (b); \
7040 uint32x4_t a_ = (a); \
7041 uint32x4_t result; \
7042 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7043 : "=w"(result) \
7044 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7045 : /* No clobbers */); \
7046 result; \
7049 #define vmlal_laneq_u32(a, b, c, d) \
7050 __extension__ \
7051 ({ \
7052 uint32x4_t c_ = (c); \
7053 uint32x2_t b_ = (b); \
7054 uint64x2_t a_ = (a); \
7055 uint64x2_t result; \
7056 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7057 : "=w"(result) \
7058 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7059 : /* No clobbers */); \
7060 result; \
7063 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7064 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7066 int32x4_t result;
7067 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7068 : "=w"(result)
7069 : "0"(a), "w"(b), "x"(c)
7070 : /* No clobbers */);
7071 return result;
7074 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7075 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7077 int64x2_t result;
7078 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7079 : "=w"(result)
7080 : "0"(a), "w"(b), "w"(c)
7081 : /* No clobbers */);
7082 return result;
7085 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7086 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7088 uint32x4_t result;
7089 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7090 : "=w"(result)
7091 : "0"(a), "w"(b), "x"(c)
7092 : /* No clobbers */);
7093 return result;
7096 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7097 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7099 uint64x2_t result;
7100 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7101 : "=w"(result)
7102 : "0"(a), "w"(b), "w"(c)
7103 : /* No clobbers */);
7104 return result;
7107 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7108 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7110 int16x8_t result;
7111 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7112 : "=w"(result)
7113 : "0"(a), "w"(b), "w"(c)
7114 : /* No clobbers */);
7115 return result;
7118 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7119 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7121 int32x4_t result;
7122 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7123 : "=w"(result)
7124 : "0"(a), "w"(b), "w"(c)
7125 : /* No clobbers */);
7126 return result;
7129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7130 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7132 int64x2_t result;
7133 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7134 : "=w"(result)
7135 : "0"(a), "w"(b), "w"(c)
7136 : /* No clobbers */);
7137 return result;
7140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7141 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7143 uint16x8_t result;
7144 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7145 : "=w"(result)
7146 : "0"(a), "w"(b), "w"(c)
7147 : /* No clobbers */);
7148 return result;
7151 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7152 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7154 uint32x4_t result;
7155 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7156 : "=w"(result)
7157 : "0"(a), "w"(b), "w"(c)
7158 : /* No clobbers */);
7159 return result;
7162 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7163 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7165 uint64x2_t result;
7166 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7167 : "=w"(result)
7168 : "0"(a), "w"(b), "w"(c)
7169 : /* No clobbers */);
7170 return result;
7173 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7174 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7176 float32x4_t result;
7177 float32x4_t t1;
7178 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7179 : "=w"(result), "=w"(t1)
7180 : "0"(a), "w"(b), "w"(c)
7181 : /* No clobbers */);
7182 return result;
7185 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7186 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7188 int16x8_t result;
7189 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7190 : "=w"(result)
7191 : "0"(a), "w"(b), "x"(c)
7192 : /* No clobbers */);
7193 return result;
7196 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7197 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7199 int32x4_t result;
7200 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7201 : "=w"(result)
7202 : "0"(a), "w"(b), "w"(c)
7203 : /* No clobbers */);
7204 return result;
7207 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7208 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7210 uint16x8_t result;
7211 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7212 : "=w"(result)
7213 : "0"(a), "w"(b), "x"(c)
7214 : /* No clobbers */);
7215 return result;
7218 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7219 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7221 uint32x4_t result;
7222 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7223 : "=w"(result)
7224 : "0"(a), "w"(b), "w"(c)
7225 : /* No clobbers */);
7226 return result;
7229 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7230 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7232 int8x16_t result;
7233 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7234 : "=w"(result)
7235 : "0"(a), "w"(b), "w"(c)
7236 : /* No clobbers */);
7237 return result;
7240 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7241 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7243 int16x8_t result;
7244 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7245 : "=w"(result)
7246 : "0"(a), "w"(b), "w"(c)
7247 : /* No clobbers */);
7248 return result;
7251 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7252 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7254 int32x4_t result;
7255 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7256 : "=w"(result)
7257 : "0"(a), "w"(b), "w"(c)
7258 : /* No clobbers */);
7259 return result;
7262 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7263 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7265 uint8x16_t result;
7266 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7267 : "=w"(result)
7268 : "0"(a), "w"(b), "w"(c)
7269 : /* No clobbers */);
7270 return result;
7273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7274 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7276 uint16x8_t result;
7277 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7278 : "=w"(result)
7279 : "0"(a), "w"(b), "w"(c)
7280 : /* No clobbers */);
7281 return result;
7284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7285 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7287 uint32x4_t result;
7288 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7289 : "=w"(result)
7290 : "0"(a), "w"(b), "w"(c)
7291 : /* No clobbers */);
7292 return result;
7295 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7296 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7298 float32x2_t result;
7299 float32x2_t t1;
7300 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7301 : "=w"(result), "=w"(t1)
7302 : "0"(a), "w"(b), "w"(c)
7303 : /* No clobbers */);
7304 return result;
7307 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7308 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7310 int16x4_t result;
7311 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7312 : "=w"(result)
7313 : "0"(a), "w"(b), "x"(c)
7314 : /* No clobbers */);
7315 return result;
7318 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7319 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7321 int32x2_t result;
7322 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7323 : "=w"(result)
7324 : "0"(a), "w"(b), "w"(c)
7325 : /* No clobbers */);
7326 return result;
7329 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7330 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7332 uint16x4_t result;
7333 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7334 : "=w"(result)
7335 : "0"(a), "w"(b), "x"(c)
7336 : /* No clobbers */);
7337 return result;
7340 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7341 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7343 uint32x2_t result;
7344 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7345 : "=w"(result)
7346 : "0"(a), "w"(b), "w"(c)
7347 : /* No clobbers */);
7348 return result;
7351 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7352 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7354 int8x8_t result;
7355 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7356 : "=w"(result)
7357 : "0"(a), "w"(b), "w"(c)
7358 : /* No clobbers */);
7359 return result;
7362 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7363 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7365 int16x4_t result;
7366 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7367 : "=w"(result)
7368 : "0"(a), "w"(b), "w"(c)
7369 : /* No clobbers */);
7370 return result;
7373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7374 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7376 int32x2_t result;
7377 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7378 : "=w"(result)
7379 : "0"(a), "w"(b), "w"(c)
7380 : /* No clobbers */);
7381 return result;
7384 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7385 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7387 uint8x8_t result;
7388 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7389 : "=w"(result)
7390 : "0"(a), "w"(b), "w"(c)
7391 : /* No clobbers */);
7392 return result;
7395 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7396 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7398 uint16x4_t result;
7399 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7400 : "=w"(result)
7401 : "0"(a), "w"(b), "w"(c)
7402 : /* No clobbers */);
7403 return result;
7406 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7407 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7409 uint32x2_t result;
7410 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7411 : "=w"(result)
7412 : "0"(a), "w"(b), "w"(c)
7413 : /* No clobbers */);
7414 return result;
7417 #define vmlsl_high_lane_s16(a, b, c, d) \
7418 __extension__ \
7419 ({ \
7420 int16x4_t c_ = (c); \
7421 int16x8_t b_ = (b); \
7422 int32x4_t a_ = (a); \
7423 int32x4_t result; \
7424 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7425 : "=w"(result) \
7426 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7427 : /* No clobbers */); \
7428 result; \
7431 #define vmlsl_high_lane_s32(a, b, c, d) \
7432 __extension__ \
7433 ({ \
7434 int32x2_t c_ = (c); \
7435 int32x4_t b_ = (b); \
7436 int64x2_t a_ = (a); \
7437 int64x2_t result; \
7438 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7439 : "=w"(result) \
7440 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7441 : /* No clobbers */); \
7442 result; \
7445 #define vmlsl_high_lane_u16(a, b, c, d) \
7446 __extension__ \
7447 ({ \
7448 uint16x4_t c_ = (c); \
7449 uint16x8_t b_ = (b); \
7450 uint32x4_t a_ = (a); \
7451 uint32x4_t result; \
7452 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7453 : "=w"(result) \
7454 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7455 : /* No clobbers */); \
7456 result; \
7459 #define vmlsl_high_lane_u32(a, b, c, d) \
7460 __extension__ \
7461 ({ \
7462 uint32x2_t c_ = (c); \
7463 uint32x4_t b_ = (b); \
7464 uint64x2_t a_ = (a); \
7465 uint64x2_t result; \
7466 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7467 : "=w"(result) \
7468 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7469 : /* No clobbers */); \
7470 result; \
7473 #define vmlsl_high_laneq_s16(a, b, c, d) \
7474 __extension__ \
7475 ({ \
7476 int16x8_t c_ = (c); \
7477 int16x8_t b_ = (b); \
7478 int32x4_t a_ = (a); \
7479 int32x4_t result; \
7480 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7481 : "=w"(result) \
7482 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7483 : /* No clobbers */); \
7484 result; \
7487 #define vmlsl_high_laneq_s32(a, b, c, d) \
7488 __extension__ \
7489 ({ \
7490 int32x4_t c_ = (c); \
7491 int32x4_t b_ = (b); \
7492 int64x2_t a_ = (a); \
7493 int64x2_t result; \
7494 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7495 : "=w"(result) \
7496 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7497 : /* No clobbers */); \
7498 result; \
7501 #define vmlsl_high_laneq_u16(a, b, c, d) \
7502 __extension__ \
7503 ({ \
7504 uint16x8_t c_ = (c); \
7505 uint16x8_t b_ = (b); \
7506 uint32x4_t a_ = (a); \
7507 uint32x4_t result; \
7508 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7509 : "=w"(result) \
7510 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7511 : /* No clobbers */); \
7512 result; \
7515 #define vmlsl_high_laneq_u32(a, b, c, d) \
7516 __extension__ \
7517 ({ \
7518 uint32x4_t c_ = (c); \
7519 uint32x4_t b_ = (b); \
7520 uint64x2_t a_ = (a); \
7521 uint64x2_t result; \
7522 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7523 : "=w"(result) \
7524 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7525 : /* No clobbers */); \
7526 result; \
7529 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7530 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7532 int32x4_t result;
7533 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7534 : "=w"(result)
7535 : "0"(a), "w"(b), "x"(c)
7536 : /* No clobbers */);
7537 return result;
7540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7541 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7543 int64x2_t result;
7544 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7545 : "=w"(result)
7546 : "0"(a), "w"(b), "w"(c)
7547 : /* No clobbers */);
7548 return result;
7551 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7552 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7554 uint32x4_t result;
7555 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7556 : "=w"(result)
7557 : "0"(a), "w"(b), "x"(c)
7558 : /* No clobbers */);
7559 return result;
7562 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7563 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7565 uint64x2_t result;
7566 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7567 : "=w"(result)
7568 : "0"(a), "w"(b), "w"(c)
7569 : /* No clobbers */);
7570 return result;
7573 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7574 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7576 int16x8_t result;
7577 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7578 : "=w"(result)
7579 : "0"(a), "w"(b), "w"(c)
7580 : /* No clobbers */);
7581 return result;
7584 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7585 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7587 int32x4_t result;
7588 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7589 : "=w"(result)
7590 : "0"(a), "w"(b), "w"(c)
7591 : /* No clobbers */);
7592 return result;
7595 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7596 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7598 int64x2_t result;
7599 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7600 : "=w"(result)
7601 : "0"(a), "w"(b), "w"(c)
7602 : /* No clobbers */);
7603 return result;
7606 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7607 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7609 uint16x8_t result;
7610 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7611 : "=w"(result)
7612 : "0"(a), "w"(b), "w"(c)
7613 : /* No clobbers */);
7614 return result;
7617 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7618 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7620 uint32x4_t result;
7621 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7622 : "=w"(result)
7623 : "0"(a), "w"(b), "w"(c)
7624 : /* No clobbers */);
7625 return result;
7628 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7629 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7631 uint64x2_t result;
7632 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7633 : "=w"(result)
7634 : "0"(a), "w"(b), "w"(c)
7635 : /* No clobbers */);
7636 return result;
7639 #define vmlsl_lane_s16(a, b, c, d) \
7640 __extension__ \
7641 ({ \
7642 int16x4_t c_ = (c); \
7643 int16x4_t b_ = (b); \
7644 int32x4_t a_ = (a); \
7645 int32x4_t result; \
7646 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7647 : "=w"(result) \
7648 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7649 : /* No clobbers */); \
7650 result; \
7653 #define vmlsl_lane_s32(a, b, c, d) \
7654 __extension__ \
7655 ({ \
7656 int32x2_t c_ = (c); \
7657 int32x2_t b_ = (b); \
7658 int64x2_t a_ = (a); \
7659 int64x2_t result; \
7660 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7661 : "=w"(result) \
7662 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7663 : /* No clobbers */); \
7664 result; \
7667 #define vmlsl_lane_u16(a, b, c, d) \
7668 __extension__ \
7669 ({ \
7670 uint16x4_t c_ = (c); \
7671 uint16x4_t b_ = (b); \
7672 uint32x4_t a_ = (a); \
7673 uint32x4_t result; \
7674 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7675 : "=w"(result) \
7676 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7677 : /* No clobbers */); \
7678 result; \
7681 #define vmlsl_lane_u32(a, b, c, d) \
7682 __extension__ \
7683 ({ \
7684 uint32x2_t c_ = (c); \
7685 uint32x2_t b_ = (b); \
7686 uint64x2_t a_ = (a); \
7687 uint64x2_t result; \
7688 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7689 : "=w"(result) \
7690 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7691 : /* No clobbers */); \
7692 result; \
7695 #define vmlsl_laneq_s16(a, b, c, d) \
7696 __extension__ \
7697 ({ \
7698 int16x8_t c_ = (c); \
7699 int16x4_t b_ = (b); \
7700 int32x4_t a_ = (a); \
7701 int32x4_t result; \
7702 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7703 : "=w"(result) \
7704 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7705 : /* No clobbers */); \
7706 result; \
7709 #define vmlsl_laneq_s32(a, b, c, d) \
7710 __extension__ \
7711 ({ \
7712 int32x4_t c_ = (c); \
7713 int32x2_t b_ = (b); \
7714 int64x2_t a_ = (a); \
7715 int64x2_t result; \
7716 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7717 : "=w"(result) \
7718 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7719 : /* No clobbers */); \
7720 result; \
7723 #define vmlsl_laneq_u16(a, b, c, d) \
7724 __extension__ \
7725 ({ \
7726 uint16x8_t c_ = (c); \
7727 uint16x4_t b_ = (b); \
7728 uint32x4_t a_ = (a); \
7729 uint32x4_t result; \
7730 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7731 : "=w"(result) \
7732 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7733 : /* No clobbers */); \
7734 result; \
7737 #define vmlsl_laneq_u32(a, b, c, d) \
7738 __extension__ \
7739 ({ \
7740 uint32x4_t c_ = (c); \
7741 uint32x2_t b_ = (b); \
7742 uint64x2_t a_ = (a); \
7743 uint64x2_t result; \
7744 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7745 : "=w"(result) \
7746 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7747 : /* No clobbers */); \
7748 result; \
7751 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7752 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7754 int32x4_t result;
7755 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7756 : "=w"(result)
7757 : "0"(a), "w"(b), "x"(c)
7758 : /* No clobbers */);
7759 return result;
7762 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7763 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7765 int64x2_t result;
7766 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7767 : "=w"(result)
7768 : "0"(a), "w"(b), "w"(c)
7769 : /* No clobbers */);
7770 return result;
7773 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7774 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7776 uint32x4_t result;
7777 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7778 : "=w"(result)
7779 : "0"(a), "w"(b), "x"(c)
7780 : /* No clobbers */);
7781 return result;
7784 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7785 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7787 uint64x2_t result;
7788 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7789 : "=w"(result)
7790 : "0"(a), "w"(b), "w"(c)
7791 : /* No clobbers */);
7792 return result;
7795 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7796 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7798 int16x8_t result;
7799 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7800 : "=w"(result)
7801 : "0"(a), "w"(b), "w"(c)
7802 : /* No clobbers */);
7803 return result;
7806 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7807 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7809 int32x4_t result;
7810 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7811 : "=w"(result)
7812 : "0"(a), "w"(b), "w"(c)
7813 : /* No clobbers */);
7814 return result;
7817 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7818 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7820 int64x2_t result;
7821 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7822 : "=w"(result)
7823 : "0"(a), "w"(b), "w"(c)
7824 : /* No clobbers */);
7825 return result;
7828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7829 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7831 uint16x8_t result;
7832 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7833 : "=w"(result)
7834 : "0"(a), "w"(b), "w"(c)
7835 : /* No clobbers */);
7836 return result;
7839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7840 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7842 uint32x4_t result;
7843 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7844 : "=w"(result)
7845 : "0"(a), "w"(b), "w"(c)
7846 : /* No clobbers */);
7847 return result;
7850 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7851 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7853 uint64x2_t result;
7854 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7855 : "=w"(result)
7856 : "0"(a), "w"(b), "w"(c)
7857 : /* No clobbers */);
7858 return result;
7861 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7862 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7864 float32x4_t result;
7865 float32x4_t t1;
7866 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7867 : "=w"(result), "=w"(t1)
7868 : "0"(a), "w"(b), "w"(c)
7869 : /* No clobbers */);
7870 return result;
7873 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7874 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7876 int16x8_t result;
7877 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7878 : "=w"(result)
7879 : "0"(a), "w"(b), "x"(c)
7880 : /* No clobbers */);
7881 return result;
7884 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7885 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7887 int32x4_t result;
7888 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7889 : "=w"(result)
7890 : "0"(a), "w"(b), "w"(c)
7891 : /* No clobbers */);
7892 return result;
7895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7896 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7898 uint16x8_t result;
7899 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7900 : "=w"(result)
7901 : "0"(a), "w"(b), "x"(c)
7902 : /* No clobbers */);
7903 return result;
7906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7907 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7909 uint32x4_t result;
7910 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7911 : "=w"(result)
7912 : "0"(a), "w"(b), "w"(c)
7913 : /* No clobbers */);
7914 return result;
7917 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7918 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7920 int8x16_t result;
7921 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7922 : "=w"(result)
7923 : "0"(a), "w"(b), "w"(c)
7924 : /* No clobbers */);
7925 return result;
7928 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7929 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7931 int16x8_t result;
7932 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7933 : "=w"(result)
7934 : "0"(a), "w"(b), "w"(c)
7935 : /* No clobbers */);
7936 return result;
7939 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7940 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7942 int32x4_t result;
7943 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7944 : "=w"(result)
7945 : "0"(a), "w"(b), "w"(c)
7946 : /* No clobbers */);
7947 return result;
7950 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7951 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7953 uint8x16_t result;
7954 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7955 : "=w"(result)
7956 : "0"(a), "w"(b), "w"(c)
7957 : /* No clobbers */);
7958 return result;
7961 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7962 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7964 uint16x8_t result;
7965 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7966 : "=w"(result)
7967 : "0"(a), "w"(b), "w"(c)
7968 : /* No clobbers */);
7969 return result;
7972 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7973 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7975 uint32x4_t result;
7976 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7977 : "=w"(result)
7978 : "0"(a), "w"(b), "w"(c)
7979 : /* No clobbers */);
7980 return result;
7983 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7984 vmovl_high_s8 (int8x16_t a)
7986 int16x8_t result;
7987 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7988 : "=w"(result)
7989 : "w"(a)
7990 : /* No clobbers */);
7991 return result;
7994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7995 vmovl_high_s16 (int16x8_t a)
7997 int32x4_t result;
7998 __asm__ ("sshll2 %0.4s,%1.8h,#0"
7999 : "=w"(result)
8000 : "w"(a)
8001 : /* No clobbers */);
8002 return result;
8005 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8006 vmovl_high_s32 (int32x4_t a)
8008 int64x2_t result;
8009 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8010 : "=w"(result)
8011 : "w"(a)
8012 : /* No clobbers */);
8013 return result;
8016 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8017 vmovl_high_u8 (uint8x16_t a)
8019 uint16x8_t result;
8020 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8021 : "=w"(result)
8022 : "w"(a)
8023 : /* No clobbers */);
8024 return result;
8027 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8028 vmovl_high_u16 (uint16x8_t a)
8030 uint32x4_t result;
8031 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8032 : "=w"(result)
8033 : "w"(a)
8034 : /* No clobbers */);
8035 return result;
8038 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8039 vmovl_high_u32 (uint32x4_t a)
8041 uint64x2_t result;
8042 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8043 : "=w"(result)
8044 : "w"(a)
8045 : /* No clobbers */);
8046 return result;
8049 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8050 vmovl_s8 (int8x8_t a)
8052 int16x8_t result;
8053 __asm__ ("sshll %0.8h,%1.8b,#0"
8054 : "=w"(result)
8055 : "w"(a)
8056 : /* No clobbers */);
8057 return result;
8060 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8061 vmovl_s16 (int16x4_t a)
8063 int32x4_t result;
8064 __asm__ ("sshll %0.4s,%1.4h,#0"
8065 : "=w"(result)
8066 : "w"(a)
8067 : /* No clobbers */);
8068 return result;
8071 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8072 vmovl_s32 (int32x2_t a)
8074 int64x2_t result;
8075 __asm__ ("sshll %0.2d,%1.2s,#0"
8076 : "=w"(result)
8077 : "w"(a)
8078 : /* No clobbers */);
8079 return result;
8082 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8083 vmovl_u8 (uint8x8_t a)
8085 uint16x8_t result;
8086 __asm__ ("ushll %0.8h,%1.8b,#0"
8087 : "=w"(result)
8088 : "w"(a)
8089 : /* No clobbers */);
8090 return result;
8093 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8094 vmovl_u16 (uint16x4_t a)
8096 uint32x4_t result;
8097 __asm__ ("ushll %0.4s,%1.4h,#0"
8098 : "=w"(result)
8099 : "w"(a)
8100 : /* No clobbers */);
8101 return result;
8104 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8105 vmovl_u32 (uint32x2_t a)
8107 uint64x2_t result;
8108 __asm__ ("ushll %0.2d,%1.2s,#0"
8109 : "=w"(result)
8110 : "w"(a)
8111 : /* No clobbers */);
8112 return result;
8115 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8116 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8118 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8119 __asm__ ("xtn2 %0.16b,%1.8h"
8120 : "+w"(result)
8121 : "w"(b)
8122 : /* No clobbers */);
8123 return result;
8126 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8127 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8129 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8130 __asm__ ("xtn2 %0.8h,%1.4s"
8131 : "+w"(result)
8132 : "w"(b)
8133 : /* No clobbers */);
8134 return result;
8137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8138 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8140 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8141 __asm__ ("xtn2 %0.4s,%1.2d"
8142 : "+w"(result)
8143 : "w"(b)
8144 : /* No clobbers */);
8145 return result;
8148 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8149 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8151 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8152 __asm__ ("xtn2 %0.16b,%1.8h"
8153 : "+w"(result)
8154 : "w"(b)
8155 : /* No clobbers */);
8156 return result;
8159 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8160 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8162 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8163 __asm__ ("xtn2 %0.8h,%1.4s"
8164 : "+w"(result)
8165 : "w"(b)
8166 : /* No clobbers */);
8167 return result;
8170 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8171 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8173 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8174 __asm__ ("xtn2 %0.4s,%1.2d"
8175 : "+w"(result)
8176 : "w"(b)
8177 : /* No clobbers */);
8178 return result;
8181 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8182 vmovn_s16 (int16x8_t a)
8184 int8x8_t result;
8185 __asm__ ("xtn %0.8b,%1.8h"
8186 : "=w"(result)
8187 : "w"(a)
8188 : /* No clobbers */);
8189 return result;
8192 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8193 vmovn_s32 (int32x4_t a)
8195 int16x4_t result;
8196 __asm__ ("xtn %0.4h,%1.4s"
8197 : "=w"(result)
8198 : "w"(a)
8199 : /* No clobbers */);
8200 return result;
8203 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8204 vmovn_s64 (int64x2_t a)
8206 int32x2_t result;
8207 __asm__ ("xtn %0.2s,%1.2d"
8208 : "=w"(result)
8209 : "w"(a)
8210 : /* No clobbers */);
8211 return result;
8214 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8215 vmovn_u16 (uint16x8_t a)
8217 uint8x8_t result;
8218 __asm__ ("xtn %0.8b,%1.8h"
8219 : "=w"(result)
8220 : "w"(a)
8221 : /* No clobbers */);
8222 return result;
8225 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8226 vmovn_u32 (uint32x4_t a)
8228 uint16x4_t result;
8229 __asm__ ("xtn %0.4h,%1.4s"
8230 : "=w"(result)
8231 : "w"(a)
8232 : /* No clobbers */);
8233 return result;
8236 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8237 vmovn_u64 (uint64x2_t a)
8239 uint32x2_t result;
8240 __asm__ ("xtn %0.2s,%1.2d"
8241 : "=w"(result)
8242 : "w"(a)
8243 : /* No clobbers */);
8244 return result;
8247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8248 vmul_n_f32 (float32x2_t a, float32_t b)
8250 float32x2_t result;
8251 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8252 : "=w"(result)
8253 : "w"(a), "w"(b)
8254 : /* No clobbers */);
8255 return result;
8258 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8259 vmul_n_s16 (int16x4_t a, int16_t b)
8261 int16x4_t result;
8262 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8263 : "=w"(result)
8264 : "w"(a), "x"(b)
8265 : /* No clobbers */);
8266 return result;
8269 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8270 vmul_n_s32 (int32x2_t a, int32_t b)
8272 int32x2_t result;
8273 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8274 : "=w"(result)
8275 : "w"(a), "w"(b)
8276 : /* No clobbers */);
8277 return result;
8280 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8281 vmul_n_u16 (uint16x4_t a, uint16_t b)
8283 uint16x4_t result;
8284 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8285 : "=w"(result)
8286 : "w"(a), "x"(b)
8287 : /* No clobbers */);
8288 return result;
8291 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8292 vmul_n_u32 (uint32x2_t a, uint32_t b)
8294 uint32x2_t result;
8295 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8296 : "=w"(result)
8297 : "w"(a), "w"(b)
8298 : /* No clobbers */);
8299 return result;
8302 #define vmuld_lane_f64(a, b, c) \
8303 __extension__ \
8304 ({ \
8305 float64x2_t b_ = (b); \
8306 float64_t a_ = (a); \
8307 float64_t result; \
8308 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8309 : "=w"(result) \
8310 : "w"(a_), "w"(b_), "i"(c) \
8311 : /* No clobbers */); \
8312 result; \
8315 #define vmull_high_lane_s16(a, b, c) \
8316 __extension__ \
8317 ({ \
8318 int16x8_t b_ = (b); \
8319 int16x8_t a_ = (a); \
8320 int32x4_t result; \
8321 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8322 : "=w"(result) \
8323 : "w"(a_), "x"(b_), "i"(c) \
8324 : /* No clobbers */); \
8325 result; \
8328 #define vmull_high_lane_s32(a, b, c) \
8329 __extension__ \
8330 ({ \
8331 int32x4_t b_ = (b); \
8332 int32x4_t a_ = (a); \
8333 int64x2_t result; \
8334 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8335 : "=w"(result) \
8336 : "w"(a_), "w"(b_), "i"(c) \
8337 : /* No clobbers */); \
8338 result; \
8341 #define vmull_high_lane_u16(a, b, c) \
8342 __extension__ \
8343 ({ \
8344 uint16x8_t b_ = (b); \
8345 uint16x8_t a_ = (a); \
8346 uint32x4_t result; \
8347 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8348 : "=w"(result) \
8349 : "w"(a_), "x"(b_), "i"(c) \
8350 : /* No clobbers */); \
8351 result; \
8354 #define vmull_high_lane_u32(a, b, c) \
8355 __extension__ \
8356 ({ \
8357 uint32x4_t b_ = (b); \
8358 uint32x4_t a_ = (a); \
8359 uint64x2_t result; \
8360 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8361 : "=w"(result) \
8362 : "w"(a_), "w"(b_), "i"(c) \
8363 : /* No clobbers */); \
8364 result; \
8367 #define vmull_high_laneq_s16(a, b, c) \
8368 __extension__ \
8369 ({ \
8370 int16x8_t b_ = (b); \
8371 int16x8_t a_ = (a); \
8372 int32x4_t result; \
8373 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8374 : "=w"(result) \
8375 : "w"(a_), "x"(b_), "i"(c) \
8376 : /* No clobbers */); \
8377 result; \
8380 #define vmull_high_laneq_s32(a, b, c) \
8381 __extension__ \
8382 ({ \
8383 int32x4_t b_ = (b); \
8384 int32x4_t a_ = (a); \
8385 int64x2_t result; \
8386 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8387 : "=w"(result) \
8388 : "w"(a_), "w"(b_), "i"(c) \
8389 : /* No clobbers */); \
8390 result; \
8393 #define vmull_high_laneq_u16(a, b, c) \
8394 __extension__ \
8395 ({ \
8396 uint16x8_t b_ = (b); \
8397 uint16x8_t a_ = (a); \
8398 uint32x4_t result; \
8399 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8400 : "=w"(result) \
8401 : "w"(a_), "x"(b_), "i"(c) \
8402 : /* No clobbers */); \
8403 result; \
8406 #define vmull_high_laneq_u32(a, b, c) \
8407 __extension__ \
8408 ({ \
8409 uint32x4_t b_ = (b); \
8410 uint32x4_t a_ = (a); \
8411 uint64x2_t result; \
8412 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8413 : "=w"(result) \
8414 : "w"(a_), "w"(b_), "i"(c) \
8415 : /* No clobbers */); \
8416 result; \
8419 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8420 vmull_high_n_s16 (int16x8_t a, int16_t b)
8422 int32x4_t result;
8423 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8424 : "=w"(result)
8425 : "w"(a), "x"(b)
8426 : /* No clobbers */);
8427 return result;
8430 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8431 vmull_high_n_s32 (int32x4_t a, int32_t b)
8433 int64x2_t result;
8434 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8435 : "=w"(result)
8436 : "w"(a), "w"(b)
8437 : /* No clobbers */);
8438 return result;
8441 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8442 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8444 uint32x4_t result;
8445 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8446 : "=w"(result)
8447 : "w"(a), "x"(b)
8448 : /* No clobbers */);
8449 return result;
8452 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8453 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8455 uint64x2_t result;
8456 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8457 : "=w"(result)
8458 : "w"(a), "w"(b)
8459 : /* No clobbers */);
8460 return result;
8463 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8464 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8466 poly16x8_t result;
8467 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8468 : "=w"(result)
8469 : "w"(a), "w"(b)
8470 : /* No clobbers */);
8471 return result;
8474 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8475 vmull_high_s8 (int8x16_t a, int8x16_t b)
8477 int16x8_t result;
8478 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8479 : "=w"(result)
8480 : "w"(a), "w"(b)
8481 : /* No clobbers */);
8482 return result;
8485 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8486 vmull_high_s16 (int16x8_t a, int16x8_t b)
8488 int32x4_t result;
8489 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8490 : "=w"(result)
8491 : "w"(a), "w"(b)
8492 : /* No clobbers */);
8493 return result;
8496 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8497 vmull_high_s32 (int32x4_t a, int32x4_t b)
8499 int64x2_t result;
8500 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8501 : "=w"(result)
8502 : "w"(a), "w"(b)
8503 : /* No clobbers */);
8504 return result;
8507 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8508 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8510 uint16x8_t result;
8511 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8512 : "=w"(result)
8513 : "w"(a), "w"(b)
8514 : /* No clobbers */);
8515 return result;
8518 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8519 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8521 uint32x4_t result;
8522 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8523 : "=w"(result)
8524 : "w"(a), "w"(b)
8525 : /* No clobbers */);
8526 return result;
8529 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8530 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8532 uint64x2_t result;
8533 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8534 : "=w"(result)
8535 : "w"(a), "w"(b)
8536 : /* No clobbers */);
8537 return result;
8540 #define vmull_lane_s16(a, b, c) \
8541 __extension__ \
8542 ({ \
8543 int16x4_t b_ = (b); \
8544 int16x4_t a_ = (a); \
8545 int32x4_t result; \
8546 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8547 : "=w"(result) \
8548 : "w"(a_), "x"(b_), "i"(c) \
8549 : /* No clobbers */); \
8550 result; \
8553 #define vmull_lane_s32(a, b, c) \
8554 __extension__ \
8555 ({ \
8556 int32x2_t b_ = (b); \
8557 int32x2_t a_ = (a); \
8558 int64x2_t result; \
8559 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8560 : "=w"(result) \
8561 : "w"(a_), "w"(b_), "i"(c) \
8562 : /* No clobbers */); \
8563 result; \
8566 #define vmull_lane_u16(a, b, c) \
8567 __extension__ \
8568 ({ \
8569 uint16x4_t b_ = (b); \
8570 uint16x4_t a_ = (a); \
8571 uint32x4_t result; \
8572 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8573 : "=w"(result) \
8574 : "w"(a_), "x"(b_), "i"(c) \
8575 : /* No clobbers */); \
8576 result; \
8579 #define vmull_lane_u32(a, b, c) \
8580 __extension__ \
8581 ({ \
8582 uint32x2_t b_ = (b); \
8583 uint32x2_t a_ = (a); \
8584 uint64x2_t result; \
8585 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8586 : "=w"(result) \
8587 : "w"(a_), "w"(b_), "i"(c) \
8588 : /* No clobbers */); \
8589 result; \
8592 #define vmull_laneq_s16(a, b, c) \
8593 __extension__ \
8594 ({ \
8595 int16x8_t b_ = (b); \
8596 int16x4_t a_ = (a); \
8597 int32x4_t result; \
8598 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8599 : "=w"(result) \
8600 : "w"(a_), "x"(b_), "i"(c) \
8601 : /* No clobbers */); \
8602 result; \
8605 #define vmull_laneq_s32(a, b, c) \
8606 __extension__ \
8607 ({ \
8608 int32x4_t b_ = (b); \
8609 int32x2_t a_ = (a); \
8610 int64x2_t result; \
8611 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8612 : "=w"(result) \
8613 : "w"(a_), "w"(b_), "i"(c) \
8614 : /* No clobbers */); \
8615 result; \
8618 #define vmull_laneq_u16(a, b, c) \
8619 __extension__ \
8620 ({ \
8621 uint16x8_t b_ = (b); \
8622 uint16x4_t a_ = (a); \
8623 uint32x4_t result; \
8624 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8625 : "=w"(result) \
8626 : "w"(a_), "x"(b_), "i"(c) \
8627 : /* No clobbers */); \
8628 result; \
8631 #define vmull_laneq_u32(a, b, c) \
8632 __extension__ \
8633 ({ \
8634 uint32x4_t b_ = (b); \
8635 uint32x2_t a_ = (a); \
8636 uint64x2_t result; \
8637 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8638 : "=w"(result) \
8639 : "w"(a_), "w"(b_), "i"(c) \
8640 : /* No clobbers */); \
8641 result; \
8644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8645 vmull_n_s16 (int16x4_t a, int16_t b)
8647 int32x4_t result;
8648 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8649 : "=w"(result)
8650 : "w"(a), "x"(b)
8651 : /* No clobbers */);
8652 return result;
8655 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8656 vmull_n_s32 (int32x2_t a, int32_t b)
8658 int64x2_t result;
8659 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8660 : "=w"(result)
8661 : "w"(a), "w"(b)
8662 : /* No clobbers */);
8663 return result;
8666 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8667 vmull_n_u16 (uint16x4_t a, uint16_t b)
8669 uint32x4_t result;
8670 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8671 : "=w"(result)
8672 : "w"(a), "x"(b)
8673 : /* No clobbers */);
8674 return result;
8677 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8678 vmull_n_u32 (uint32x2_t a, uint32_t b)
8680 uint64x2_t result;
8681 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8682 : "=w"(result)
8683 : "w"(a), "w"(b)
8684 : /* No clobbers */);
8685 return result;
8688 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8689 vmull_p8 (poly8x8_t a, poly8x8_t b)
8691 poly16x8_t result;
8692 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8693 : "=w"(result)
8694 : "w"(a), "w"(b)
8695 : /* No clobbers */);
8696 return result;
8699 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8700 vmull_s8 (int8x8_t a, int8x8_t b)
8702 int16x8_t result;
8703 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8704 : "=w"(result)
8705 : "w"(a), "w"(b)
8706 : /* No clobbers */);
8707 return result;
8710 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8711 vmull_s16 (int16x4_t a, int16x4_t b)
8713 int32x4_t result;
8714 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8715 : "=w"(result)
8716 : "w"(a), "w"(b)
8717 : /* No clobbers */);
8718 return result;
8721 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8722 vmull_s32 (int32x2_t a, int32x2_t b)
8724 int64x2_t result;
8725 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8726 : "=w"(result)
8727 : "w"(a), "w"(b)
8728 : /* No clobbers */);
8729 return result;
8732 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8733 vmull_u8 (uint8x8_t a, uint8x8_t b)
8735 uint16x8_t result;
8736 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8737 : "=w"(result)
8738 : "w"(a), "w"(b)
8739 : /* No clobbers */);
8740 return result;
8743 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8744 vmull_u16 (uint16x4_t a, uint16x4_t b)
8746 uint32x4_t result;
8747 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8748 : "=w"(result)
8749 : "w"(a), "w"(b)
8750 : /* No clobbers */);
8751 return result;
8754 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8755 vmull_u32 (uint32x2_t a, uint32x2_t b)
8757 uint64x2_t result;
8758 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8759 : "=w"(result)
8760 : "w"(a), "w"(b)
8761 : /* No clobbers */);
8762 return result;
8765 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8766 vmulq_n_f32 (float32x4_t a, float32_t b)
8768 float32x4_t result;
8769 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8770 : "=w"(result)
8771 : "w"(a), "w"(b)
8772 : /* No clobbers */);
8773 return result;
8776 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8777 vmulq_n_f64 (float64x2_t a, float64_t b)
8779 float64x2_t result;
8780 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8781 : "=w"(result)
8782 : "w"(a), "w"(b)
8783 : /* No clobbers */);
8784 return result;
8787 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8788 vmulq_n_s16 (int16x8_t a, int16_t b)
8790 int16x8_t result;
8791 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8792 : "=w"(result)
8793 : "w"(a), "x"(b)
8794 : /* No clobbers */);
8795 return result;
8798 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8799 vmulq_n_s32 (int32x4_t a, int32_t b)
8801 int32x4_t result;
8802 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8803 : "=w"(result)
8804 : "w"(a), "w"(b)
8805 : /* No clobbers */);
8806 return result;
8809 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8810 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8812 uint16x8_t result;
8813 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8814 : "=w"(result)
8815 : "w"(a), "x"(b)
8816 : /* No clobbers */);
8817 return result;
8820 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8821 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8823 uint32x4_t result;
8824 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8825 : "=w"(result)
8826 : "w"(a), "w"(b)
8827 : /* No clobbers */);
8828 return result;
8831 #define vmuls_lane_f32(a, b, c) \
8832 __extension__ \
8833 ({ \
8834 float32x4_t b_ = (b); \
8835 float32_t a_ = (a); \
8836 float32_t result; \
8837 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
8838 : "=w"(result) \
8839 : "w"(a_), "w"(b_), "i"(c) \
8840 : /* No clobbers */); \
8841 result; \
8844 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8845 vmulx_f32 (float32x2_t a, float32x2_t b)
8847 float32x2_t result;
8848 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8849 : "=w"(result)
8850 : "w"(a), "w"(b)
8851 : /* No clobbers */);
8852 return result;
8855 #define vmulx_lane_f32(a, b, c) \
8856 __extension__ \
8857 ({ \
8858 float32x4_t b_ = (b); \
8859 float32x2_t a_ = (a); \
8860 float32x2_t result; \
8861 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8862 : "=w"(result) \
8863 : "w"(a_), "w"(b_), "i"(c) \
8864 : /* No clobbers */); \
8865 result; \
8868 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8869 vmulxd_f64 (float64_t a, float64_t b)
8871 float64_t result;
8872 __asm__ ("fmulx %d0, %d1, %d2"
8873 : "=w"(result)
8874 : "w"(a), "w"(b)
8875 : /* No clobbers */);
8876 return result;
8879 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8880 vmulxq_f32 (float32x4_t a, float32x4_t b)
8882 float32x4_t result;
8883 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8884 : "=w"(result)
8885 : "w"(a), "w"(b)
8886 : /* No clobbers */);
8887 return result;
8890 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8891 vmulxq_f64 (float64x2_t a, float64x2_t b)
8893 float64x2_t result;
8894 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8895 : "=w"(result)
8896 : "w"(a), "w"(b)
8897 : /* No clobbers */);
8898 return result;
8901 #define vmulxq_lane_f32(a, b, c) \
8902 __extension__ \
8903 ({ \
8904 float32x4_t b_ = (b); \
8905 float32x4_t a_ = (a); \
8906 float32x4_t result; \
8907 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8908 : "=w"(result) \
8909 : "w"(a_), "w"(b_), "i"(c) \
8910 : /* No clobbers */); \
8911 result; \
8914 #define vmulxq_lane_f64(a, b, c) \
8915 __extension__ \
8916 ({ \
8917 float64x2_t b_ = (b); \
8918 float64x2_t a_ = (a); \
8919 float64x2_t result; \
8920 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
8921 : "=w"(result) \
8922 : "w"(a_), "w"(b_), "i"(c) \
8923 : /* No clobbers */); \
8924 result; \
8927 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8928 vmulxs_f32 (float32_t a, float32_t b)
8930 float32_t result;
8931 __asm__ ("fmulx %s0, %s1, %s2"
8932 : "=w"(result)
8933 : "w"(a), "w"(b)
8934 : /* No clobbers */);
8935 return result;
8938 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8939 vmvn_p8 (poly8x8_t a)
8941 poly8x8_t result;
8942 __asm__ ("mvn %0.8b,%1.8b"
8943 : "=w"(result)
8944 : "w"(a)
8945 : /* No clobbers */);
8946 return result;
8949 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8950 vmvn_s8 (int8x8_t a)
8952 int8x8_t result;
8953 __asm__ ("mvn %0.8b,%1.8b"
8954 : "=w"(result)
8955 : "w"(a)
8956 : /* No clobbers */);
8957 return result;
8960 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8961 vmvn_s16 (int16x4_t a)
8963 int16x4_t result;
8964 __asm__ ("mvn %0.8b,%1.8b"
8965 : "=w"(result)
8966 : "w"(a)
8967 : /* No clobbers */);
8968 return result;
8971 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8972 vmvn_s32 (int32x2_t a)
8974 int32x2_t result;
8975 __asm__ ("mvn %0.8b,%1.8b"
8976 : "=w"(result)
8977 : "w"(a)
8978 : /* No clobbers */);
8979 return result;
8982 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8983 vmvn_u8 (uint8x8_t a)
8985 uint8x8_t result;
8986 __asm__ ("mvn %0.8b,%1.8b"
8987 : "=w"(result)
8988 : "w"(a)
8989 : /* No clobbers */);
8990 return result;
8993 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8994 vmvn_u16 (uint16x4_t a)
8996 uint16x4_t result;
8997 __asm__ ("mvn %0.8b,%1.8b"
8998 : "=w"(result)
8999 : "w"(a)
9000 : /* No clobbers */);
9001 return result;
9004 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9005 vmvn_u32 (uint32x2_t a)
9007 uint32x2_t result;
9008 __asm__ ("mvn %0.8b,%1.8b"
9009 : "=w"(result)
9010 : "w"(a)
9011 : /* No clobbers */);
9012 return result;
9015 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9016 vmvnq_p8 (poly8x16_t a)
9018 poly8x16_t result;
9019 __asm__ ("mvn %0.16b,%1.16b"
9020 : "=w"(result)
9021 : "w"(a)
9022 : /* No clobbers */);
9023 return result;
9026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9027 vmvnq_s8 (int8x16_t a)
9029 int8x16_t result;
9030 __asm__ ("mvn %0.16b,%1.16b"
9031 : "=w"(result)
9032 : "w"(a)
9033 : /* No clobbers */);
9034 return result;
9037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9038 vmvnq_s16 (int16x8_t a)
9040 int16x8_t result;
9041 __asm__ ("mvn %0.16b,%1.16b"
9042 : "=w"(result)
9043 : "w"(a)
9044 : /* No clobbers */);
9045 return result;
9048 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9049 vmvnq_s32 (int32x4_t a)
9051 int32x4_t result;
9052 __asm__ ("mvn %0.16b,%1.16b"
9053 : "=w"(result)
9054 : "w"(a)
9055 : /* No clobbers */);
9056 return result;
9059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9060 vmvnq_u8 (uint8x16_t a)
9062 uint8x16_t result;
9063 __asm__ ("mvn %0.16b,%1.16b"
9064 : "=w"(result)
9065 : "w"(a)
9066 : /* No clobbers */);
9067 return result;
9070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9071 vmvnq_u16 (uint16x8_t a)
9073 uint16x8_t result;
9074 __asm__ ("mvn %0.16b,%1.16b"
9075 : "=w"(result)
9076 : "w"(a)
9077 : /* No clobbers */);
9078 return result;
9081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9082 vmvnq_u32 (uint32x4_t a)
9084 uint32x4_t result;
9085 __asm__ ("mvn %0.16b,%1.16b"
9086 : "=w"(result)
9087 : "w"(a)
9088 : /* No clobbers */);
9089 return result;
9093 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9094 vpadal_s8 (int16x4_t a, int8x8_t b)
9096 int16x4_t result;
9097 __asm__ ("sadalp %0.4h,%2.8b"
9098 : "=w"(result)
9099 : "0"(a), "w"(b)
9100 : /* No clobbers */);
9101 return result;
9104 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9105 vpadal_s16 (int32x2_t a, int16x4_t b)
9107 int32x2_t result;
9108 __asm__ ("sadalp %0.2s,%2.4h"
9109 : "=w"(result)
9110 : "0"(a), "w"(b)
9111 : /* No clobbers */);
9112 return result;
9115 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9116 vpadal_s32 (int64x1_t a, int32x2_t b)
9118 int64x1_t result;
9119 __asm__ ("sadalp %0.1d,%2.2s"
9120 : "=w"(result)
9121 : "0"(a), "w"(b)
9122 : /* No clobbers */);
9123 return result;
9126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9127 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9129 uint16x4_t result;
9130 __asm__ ("uadalp %0.4h,%2.8b"
9131 : "=w"(result)
9132 : "0"(a), "w"(b)
9133 : /* No clobbers */);
9134 return result;
9137 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9138 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9140 uint32x2_t result;
9141 __asm__ ("uadalp %0.2s,%2.4h"
9142 : "=w"(result)
9143 : "0"(a), "w"(b)
9144 : /* No clobbers */);
9145 return result;
9148 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9149 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9151 uint64x1_t result;
9152 __asm__ ("uadalp %0.1d,%2.2s"
9153 : "=w"(result)
9154 : "0"(a), "w"(b)
9155 : /* No clobbers */);
9156 return result;
9159 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9160 vpadalq_s8 (int16x8_t a, int8x16_t b)
9162 int16x8_t result;
9163 __asm__ ("sadalp %0.8h,%2.16b"
9164 : "=w"(result)
9165 : "0"(a), "w"(b)
9166 : /* No clobbers */);
9167 return result;
9170 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9171 vpadalq_s16 (int32x4_t a, int16x8_t b)
9173 int32x4_t result;
9174 __asm__ ("sadalp %0.4s,%2.8h"
9175 : "=w"(result)
9176 : "0"(a), "w"(b)
9177 : /* No clobbers */);
9178 return result;
9181 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9182 vpadalq_s32 (int64x2_t a, int32x4_t b)
9184 int64x2_t result;
9185 __asm__ ("sadalp %0.2d,%2.4s"
9186 : "=w"(result)
9187 : "0"(a), "w"(b)
9188 : /* No clobbers */);
9189 return result;
9192 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9193 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9195 uint16x8_t result;
9196 __asm__ ("uadalp %0.8h,%2.16b"
9197 : "=w"(result)
9198 : "0"(a), "w"(b)
9199 : /* No clobbers */);
9200 return result;
9203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9204 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9206 uint32x4_t result;
9207 __asm__ ("uadalp %0.4s,%2.8h"
9208 : "=w"(result)
9209 : "0"(a), "w"(b)
9210 : /* No clobbers */);
9211 return result;
9214 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9215 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9217 uint64x2_t result;
9218 __asm__ ("uadalp %0.2d,%2.4s"
9219 : "=w"(result)
9220 : "0"(a), "w"(b)
9221 : /* No clobbers */);
9222 return result;
9225 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9226 vpadd_f32 (float32x2_t a, float32x2_t b)
9228 float32x2_t result;
9229 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9230 : "=w"(result)
9231 : "w"(a), "w"(b)
9232 : /* No clobbers */);
9233 return result;
9236 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9237 vpadd_s8 (int8x8_t __a, int8x8_t __b)
9239 return __builtin_aarch64_addpv8qi (__a, __b);
9242 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9243 vpadd_s16 (int16x4_t __a, int16x4_t __b)
9245 return __builtin_aarch64_addpv4hi (__a, __b);
9248 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9249 vpadd_s32 (int32x2_t __a, int32x2_t __b)
9251 return __builtin_aarch64_addpv2si (__a, __b);
9254 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9255 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9257 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9258 (int8x8_t) __b);
9261 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9262 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9264 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9265 (int16x4_t) __b);
9268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9269 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9271 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9272 (int32x2_t) __b);
9275 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9276 vpaddd_f64 (float64x2_t a)
9278 float64_t result;
9279 __asm__ ("faddp %d0,%1.2d"
9280 : "=w"(result)
9281 : "w"(a)
9282 : /* No clobbers */);
9283 return result;
9286 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9287 vpaddl_s8 (int8x8_t a)
9289 int16x4_t result;
9290 __asm__ ("saddlp %0.4h,%1.8b"
9291 : "=w"(result)
9292 : "w"(a)
9293 : /* No clobbers */);
9294 return result;
9297 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9298 vpaddl_s16 (int16x4_t a)
9300 int32x2_t result;
9301 __asm__ ("saddlp %0.2s,%1.4h"
9302 : "=w"(result)
9303 : "w"(a)
9304 : /* No clobbers */);
9305 return result;
9308 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9309 vpaddl_s32 (int32x2_t a)
9311 int64x1_t result;
9312 __asm__ ("saddlp %0.1d,%1.2s"
9313 : "=w"(result)
9314 : "w"(a)
9315 : /* No clobbers */);
9316 return result;
9319 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9320 vpaddl_u8 (uint8x8_t a)
9322 uint16x4_t result;
9323 __asm__ ("uaddlp %0.4h,%1.8b"
9324 : "=w"(result)
9325 : "w"(a)
9326 : /* No clobbers */);
9327 return result;
9330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9331 vpaddl_u16 (uint16x4_t a)
9333 uint32x2_t result;
9334 __asm__ ("uaddlp %0.2s,%1.4h"
9335 : "=w"(result)
9336 : "w"(a)
9337 : /* No clobbers */);
9338 return result;
9341 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9342 vpaddl_u32 (uint32x2_t a)
9344 uint64x1_t result;
9345 __asm__ ("uaddlp %0.1d,%1.2s"
9346 : "=w"(result)
9347 : "w"(a)
9348 : /* No clobbers */);
9349 return result;
9352 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9353 vpaddlq_s8 (int8x16_t a)
9355 int16x8_t result;
9356 __asm__ ("saddlp %0.8h,%1.16b"
9357 : "=w"(result)
9358 : "w"(a)
9359 : /* No clobbers */);
9360 return result;
9363 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9364 vpaddlq_s16 (int16x8_t a)
9366 int32x4_t result;
9367 __asm__ ("saddlp %0.4s,%1.8h"
9368 : "=w"(result)
9369 : "w"(a)
9370 : /* No clobbers */);
9371 return result;
9374 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9375 vpaddlq_s32 (int32x4_t a)
9377 int64x2_t result;
9378 __asm__ ("saddlp %0.2d,%1.4s"
9379 : "=w"(result)
9380 : "w"(a)
9381 : /* No clobbers */);
9382 return result;
9385 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9386 vpaddlq_u8 (uint8x16_t a)
9388 uint16x8_t result;
9389 __asm__ ("uaddlp %0.8h,%1.16b"
9390 : "=w"(result)
9391 : "w"(a)
9392 : /* No clobbers */);
9393 return result;
9396 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9397 vpaddlq_u16 (uint16x8_t a)
9399 uint32x4_t result;
9400 __asm__ ("uaddlp %0.4s,%1.8h"
9401 : "=w"(result)
9402 : "w"(a)
9403 : /* No clobbers */);
9404 return result;
9407 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9408 vpaddlq_u32 (uint32x4_t a)
9410 uint64x2_t result;
9411 __asm__ ("uaddlp %0.2d,%1.4s"
9412 : "=w"(result)
9413 : "w"(a)
9414 : /* No clobbers */);
9415 return result;
9418 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9419 vpaddq_f32 (float32x4_t a, float32x4_t b)
9421 float32x4_t result;
9422 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9423 : "=w"(result)
9424 : "w"(a), "w"(b)
9425 : /* No clobbers */);
9426 return result;
9429 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9430 vpaddq_f64 (float64x2_t a, float64x2_t b)
9432 float64x2_t result;
9433 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9434 : "=w"(result)
9435 : "w"(a), "w"(b)
9436 : /* No clobbers */);
9437 return result;
9440 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9441 vpaddq_s8 (int8x16_t a, int8x16_t b)
9443 int8x16_t result;
9444 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9445 : "=w"(result)
9446 : "w"(a), "w"(b)
9447 : /* No clobbers */);
9448 return result;
9451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9452 vpaddq_s16 (int16x8_t a, int16x8_t b)
9454 int16x8_t result;
9455 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9456 : "=w"(result)
9457 : "w"(a), "w"(b)
9458 : /* No clobbers */);
9459 return result;
9462 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9463 vpaddq_s32 (int32x4_t a, int32x4_t b)
9465 int32x4_t result;
9466 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9467 : "=w"(result)
9468 : "w"(a), "w"(b)
9469 : /* No clobbers */);
9470 return result;
9473 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9474 vpaddq_s64 (int64x2_t a, int64x2_t b)
9476 int64x2_t result;
9477 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9478 : "=w"(result)
9479 : "w"(a), "w"(b)
9480 : /* No clobbers */);
9481 return result;
9484 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9485 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9487 uint8x16_t result;
9488 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9489 : "=w"(result)
9490 : "w"(a), "w"(b)
9491 : /* No clobbers */);
9492 return result;
9495 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9496 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9498 uint16x8_t result;
9499 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9500 : "=w"(result)
9501 : "w"(a), "w"(b)
9502 : /* No clobbers */);
9503 return result;
9506 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9507 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9509 uint32x4_t result;
9510 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9511 : "=w"(result)
9512 : "w"(a), "w"(b)
9513 : /* No clobbers */);
9514 return result;
9517 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9518 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9520 uint64x2_t result;
9521 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9522 : "=w"(result)
9523 : "w"(a), "w"(b)
9524 : /* No clobbers */);
9525 return result;
9528 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9529 vpadds_f32 (float32x2_t a)
9531 float32_t result;
9532 __asm__ ("faddp %s0,%1.2s"
9533 : "=w"(result)
9534 : "w"(a)
9535 : /* No clobbers */);
9536 return result;
9539 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9540 vpmax_f32 (float32x2_t a, float32x2_t b)
9542 float32x2_t result;
9543 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9544 : "=w"(result)
9545 : "w"(a), "w"(b)
9546 : /* No clobbers */);
9547 return result;
9550 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9551 vpmax_s8 (int8x8_t a, int8x8_t b)
9553 int8x8_t result;
9554 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9555 : "=w"(result)
9556 : "w"(a), "w"(b)
9557 : /* No clobbers */);
9558 return result;
9561 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9562 vpmax_s16 (int16x4_t a, int16x4_t b)
9564 int16x4_t result;
9565 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9566 : "=w"(result)
9567 : "w"(a), "w"(b)
9568 : /* No clobbers */);
9569 return result;
9572 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9573 vpmax_s32 (int32x2_t a, int32x2_t b)
9575 int32x2_t result;
9576 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9577 : "=w"(result)
9578 : "w"(a), "w"(b)
9579 : /* No clobbers */);
9580 return result;
9583 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9584 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9586 uint8x8_t result;
9587 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9588 : "=w"(result)
9589 : "w"(a), "w"(b)
9590 : /* No clobbers */);
9591 return result;
9594 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9595 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9597 uint16x4_t result;
9598 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9599 : "=w"(result)
9600 : "w"(a), "w"(b)
9601 : /* No clobbers */);
9602 return result;
9605 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9606 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9608 uint32x2_t result;
9609 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9610 : "=w"(result)
9611 : "w"(a), "w"(b)
9612 : /* No clobbers */);
9613 return result;
9616 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9617 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9619 float32x2_t result;
9620 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9621 : "=w"(result)
9622 : "w"(a), "w"(b)
9623 : /* No clobbers */);
9624 return result;
9627 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9628 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9630 float32x4_t result;
9631 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9632 : "=w"(result)
9633 : "w"(a), "w"(b)
9634 : /* No clobbers */);
9635 return result;
9638 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9639 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9641 float64x2_t result;
9642 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9643 : "=w"(result)
9644 : "w"(a), "w"(b)
9645 : /* No clobbers */);
9646 return result;
9649 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9650 vpmaxnmqd_f64 (float64x2_t a)
9652 float64_t result;
9653 __asm__ ("fmaxnmp %d0,%1.2d"
9654 : "=w"(result)
9655 : "w"(a)
9656 : /* No clobbers */);
9657 return result;
9660 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9661 vpmaxnms_f32 (float32x2_t a)
9663 float32_t result;
9664 __asm__ ("fmaxnmp %s0,%1.2s"
9665 : "=w"(result)
9666 : "w"(a)
9667 : /* No clobbers */);
9668 return result;
9671 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9672 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9674 float32x4_t result;
9675 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9676 : "=w"(result)
9677 : "w"(a), "w"(b)
9678 : /* No clobbers */);
9679 return result;
9682 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9683 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9685 float64x2_t result;
9686 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9687 : "=w"(result)
9688 : "w"(a), "w"(b)
9689 : /* No clobbers */);
9690 return result;
9693 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9694 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9696 int8x16_t result;
9697 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9698 : "=w"(result)
9699 : "w"(a), "w"(b)
9700 : /* No clobbers */);
9701 return result;
9704 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9705 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9707 int16x8_t result;
9708 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9709 : "=w"(result)
9710 : "w"(a), "w"(b)
9711 : /* No clobbers */);
9712 return result;
9715 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9716 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9718 int32x4_t result;
9719 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9720 : "=w"(result)
9721 : "w"(a), "w"(b)
9722 : /* No clobbers */);
9723 return result;
9726 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9727 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9729 uint8x16_t result;
9730 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9731 : "=w"(result)
9732 : "w"(a), "w"(b)
9733 : /* No clobbers */);
9734 return result;
9737 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9738 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9740 uint16x8_t result;
9741 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9742 : "=w"(result)
9743 : "w"(a), "w"(b)
9744 : /* No clobbers */);
9745 return result;
9748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9749 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9751 uint32x4_t result;
9752 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9753 : "=w"(result)
9754 : "w"(a), "w"(b)
9755 : /* No clobbers */);
9756 return result;
9759 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9760 vpmaxqd_f64 (float64x2_t a)
9762 float64_t result;
9763 __asm__ ("fmaxp %d0,%1.2d"
9764 : "=w"(result)
9765 : "w"(a)
9766 : /* No clobbers */);
9767 return result;
9770 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9771 vpmaxs_f32 (float32x2_t a)
9773 float32_t result;
9774 __asm__ ("fmaxp %s0,%1.2s"
9775 : "=w"(result)
9776 : "w"(a)
9777 : /* No clobbers */);
9778 return result;
9781 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9782 vpmin_f32 (float32x2_t a, float32x2_t b)
9784 float32x2_t result;
9785 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9786 : "=w"(result)
9787 : "w"(a), "w"(b)
9788 : /* No clobbers */);
9789 return result;
9792 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9793 vpmin_s8 (int8x8_t a, int8x8_t b)
9795 int8x8_t result;
9796 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9797 : "=w"(result)
9798 : "w"(a), "w"(b)
9799 : /* No clobbers */);
9800 return result;
9803 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9804 vpmin_s16 (int16x4_t a, int16x4_t b)
9806 int16x4_t result;
9807 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9808 : "=w"(result)
9809 : "w"(a), "w"(b)
9810 : /* No clobbers */);
9811 return result;
9814 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9815 vpmin_s32 (int32x2_t a, int32x2_t b)
9817 int32x2_t result;
9818 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9819 : "=w"(result)
9820 : "w"(a), "w"(b)
9821 : /* No clobbers */);
9822 return result;
9825 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9826 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9828 uint8x8_t result;
9829 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9830 : "=w"(result)
9831 : "w"(a), "w"(b)
9832 : /* No clobbers */);
9833 return result;
9836 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9837 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9839 uint16x4_t result;
9840 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9841 : "=w"(result)
9842 : "w"(a), "w"(b)
9843 : /* No clobbers */);
9844 return result;
9847 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9848 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9850 uint32x2_t result;
9851 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9852 : "=w"(result)
9853 : "w"(a), "w"(b)
9854 : /* No clobbers */);
9855 return result;
9858 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9859 vpminnm_f32 (float32x2_t a, float32x2_t b)
9861 float32x2_t result;
9862 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9863 : "=w"(result)
9864 : "w"(a), "w"(b)
9865 : /* No clobbers */);
9866 return result;
9869 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9870 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9872 float32x4_t result;
9873 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9874 : "=w"(result)
9875 : "w"(a), "w"(b)
9876 : /* No clobbers */);
9877 return result;
9880 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9881 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9883 float64x2_t result;
9884 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9885 : "=w"(result)
9886 : "w"(a), "w"(b)
9887 : /* No clobbers */);
9888 return result;
9891 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9892 vpminnmqd_f64 (float64x2_t a)
9894 float64_t result;
9895 __asm__ ("fminnmp %d0,%1.2d"
9896 : "=w"(result)
9897 : "w"(a)
9898 : /* No clobbers */);
9899 return result;
9902 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9903 vpminnms_f32 (float32x2_t a)
9905 float32_t result;
9906 __asm__ ("fminnmp %s0,%1.2s"
9907 : "=w"(result)
9908 : "w"(a)
9909 : /* No clobbers */);
9910 return result;
9913 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9914 vpminq_f32 (float32x4_t a, float32x4_t b)
9916 float32x4_t result;
9917 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9918 : "=w"(result)
9919 : "w"(a), "w"(b)
9920 : /* No clobbers */);
9921 return result;
9924 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9925 vpminq_f64 (float64x2_t a, float64x2_t b)
9927 float64x2_t result;
9928 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9929 : "=w"(result)
9930 : "w"(a), "w"(b)
9931 : /* No clobbers */);
9932 return result;
9935 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9936 vpminq_s8 (int8x16_t a, int8x16_t b)
9938 int8x16_t result;
9939 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9940 : "=w"(result)
9941 : "w"(a), "w"(b)
9942 : /* No clobbers */);
9943 return result;
9946 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9947 vpminq_s16 (int16x8_t a, int16x8_t b)
9949 int16x8_t result;
9950 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9951 : "=w"(result)
9952 : "w"(a), "w"(b)
9953 : /* No clobbers */);
9954 return result;
9957 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9958 vpminq_s32 (int32x4_t a, int32x4_t b)
9960 int32x4_t result;
9961 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
9962 : "=w"(result)
9963 : "w"(a), "w"(b)
9964 : /* No clobbers */);
9965 return result;
9968 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9969 vpminq_u8 (uint8x16_t a, uint8x16_t b)
9971 uint8x16_t result;
9972 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
9973 : "=w"(result)
9974 : "w"(a), "w"(b)
9975 : /* No clobbers */);
9976 return result;
9979 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9980 vpminq_u16 (uint16x8_t a, uint16x8_t b)
9982 uint16x8_t result;
9983 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
9984 : "=w"(result)
9985 : "w"(a), "w"(b)
9986 : /* No clobbers */);
9987 return result;
9990 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9991 vpminq_u32 (uint32x4_t a, uint32x4_t b)
9993 uint32x4_t result;
9994 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
9995 : "=w"(result)
9996 : "w"(a), "w"(b)
9997 : /* No clobbers */);
9998 return result;
10001 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10002 vpminqd_f64 (float64x2_t a)
10004 float64_t result;
10005 __asm__ ("fminp %d0,%1.2d"
10006 : "=w"(result)
10007 : "w"(a)
10008 : /* No clobbers */);
10009 return result;
10012 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10013 vpmins_f32 (float32x2_t a)
10015 float32_t result;
10016 __asm__ ("fminp %s0,%1.2s"
10017 : "=w"(result)
10018 : "w"(a)
10019 : /* No clobbers */);
10020 return result;
10023 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10024 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10026 int16x4_t result;
10027 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10028 : "=w"(result)
10029 : "w"(a), "x"(b)
10030 : /* No clobbers */);
10031 return result;
10034 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10035 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10037 int32x2_t result;
10038 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10039 : "=w"(result)
10040 : "w"(a), "w"(b)
10041 : /* No clobbers */);
10042 return result;
10045 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10046 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10048 int16x8_t result;
10049 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10050 : "=w"(result)
10051 : "w"(a), "x"(b)
10052 : /* No clobbers */);
10053 return result;
10056 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10057 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10059 int32x4_t result;
10060 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10061 : "=w"(result)
10062 : "w"(a), "w"(b)
10063 : /* No clobbers */);
10064 return result;
10067 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10068 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10070 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10071 __asm__ ("sqxtn2 %0.16b, %1.8h"
10072 : "+w"(result)
10073 : "w"(b)
10074 : /* No clobbers */);
10075 return result;
10078 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10079 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10081 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10082 __asm__ ("sqxtn2 %0.8h, %1.4s"
10083 : "+w"(result)
10084 : "w"(b)
10085 : /* No clobbers */);
10086 return result;
10089 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10090 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10092 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10093 __asm__ ("sqxtn2 %0.4s, %1.2d"
10094 : "+w"(result)
10095 : "w"(b)
10096 : /* No clobbers */);
10097 return result;
10100 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10101 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10103 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10104 __asm__ ("uqxtn2 %0.16b, %1.8h"
10105 : "+w"(result)
10106 : "w"(b)
10107 : /* No clobbers */);
10108 return result;
10111 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10112 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10114 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10115 __asm__ ("uqxtn2 %0.8h, %1.4s"
10116 : "+w"(result)
10117 : "w"(b)
10118 : /* No clobbers */);
10119 return result;
10122 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10123 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10125 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10126 __asm__ ("uqxtn2 %0.4s, %1.2d"
10127 : "+w"(result)
10128 : "w"(b)
10129 : /* No clobbers */);
10130 return result;
10133 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10134 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10136 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10137 __asm__ ("sqxtun2 %0.16b, %1.8h"
10138 : "+w"(result)
10139 : "w"(b)
10140 : /* No clobbers */);
10141 return result;
10144 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10145 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10147 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10148 __asm__ ("sqxtun2 %0.8h, %1.4s"
10149 : "+w"(result)
10150 : "w"(b)
10151 : /* No clobbers */);
10152 return result;
10155 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10156 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10158 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10159 __asm__ ("sqxtun2 %0.4s, %1.2d"
10160 : "+w"(result)
10161 : "w"(b)
10162 : /* No clobbers */);
10163 return result;
10166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10167 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10169 int16x4_t result;
10170 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10171 : "=w"(result)
10172 : "w"(a), "x"(b)
10173 : /* No clobbers */);
10174 return result;
10177 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10178 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10180 int32x2_t result;
10181 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10182 : "=w"(result)
10183 : "w"(a), "w"(b)
10184 : /* No clobbers */);
10185 return result;
10188 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10189 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10191 int16x8_t result;
10192 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10193 : "=w"(result)
10194 : "w"(a), "x"(b)
10195 : /* No clobbers */);
10196 return result;
10199 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10200 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10202 int32x4_t result;
10203 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10204 : "=w"(result)
10205 : "w"(a), "w"(b)
10206 : /* No clobbers */);
10207 return result;
10210 #define vqrshrn_high_n_s16(a, b, c) \
10211 __extension__ \
10212 ({ \
10213 int16x8_t b_ = (b); \
10214 int8x8_t a_ = (a); \
10215 int8x16_t result = vcombine_s8 \
10216 (a_, vcreate_s8 \
10217 (__AARCH64_UINT64_C (0x0))); \
10218 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10219 : "+w"(result) \
10220 : "w"(b_), "i"(c) \
10221 : /* No clobbers */); \
10222 result; \
10225 #define vqrshrn_high_n_s32(a, b, c) \
10226 __extension__ \
10227 ({ \
10228 int32x4_t b_ = (b); \
10229 int16x4_t a_ = (a); \
10230 int16x8_t result = vcombine_s16 \
10231 (a_, vcreate_s16 \
10232 (__AARCH64_UINT64_C (0x0))); \
10233 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10234 : "+w"(result) \
10235 : "w"(b_), "i"(c) \
10236 : /* No clobbers */); \
10237 result; \
10240 #define vqrshrn_high_n_s64(a, b, c) \
10241 __extension__ \
10242 ({ \
10243 int64x2_t b_ = (b); \
10244 int32x2_t a_ = (a); \
10245 int32x4_t result = vcombine_s32 \
10246 (a_, vcreate_s32 \
10247 (__AARCH64_UINT64_C (0x0))); \
10248 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10249 : "+w"(result) \
10250 : "w"(b_), "i"(c) \
10251 : /* No clobbers */); \
10252 result; \
10255 #define vqrshrn_high_n_u16(a, b, c) \
10256 __extension__ \
10257 ({ \
10258 uint16x8_t b_ = (b); \
10259 uint8x8_t a_ = (a); \
10260 uint8x16_t result = vcombine_u8 \
10261 (a_, vcreate_u8 \
10262 (__AARCH64_UINT64_C (0x0))); \
10263 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10264 : "+w"(result) \
10265 : "w"(b_), "i"(c) \
10266 : /* No clobbers */); \
10267 result; \
10270 #define vqrshrn_high_n_u32(a, b, c) \
10271 __extension__ \
10272 ({ \
10273 uint32x4_t b_ = (b); \
10274 uint16x4_t a_ = (a); \
10275 uint16x8_t result = vcombine_u16 \
10276 (a_, vcreate_u16 \
10277 (__AARCH64_UINT64_C (0x0))); \
10278 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10279 : "+w"(result) \
10280 : "w"(b_), "i"(c) \
10281 : /* No clobbers */); \
10282 result; \
10285 #define vqrshrn_high_n_u64(a, b, c) \
10286 __extension__ \
10287 ({ \
10288 uint64x2_t b_ = (b); \
10289 uint32x2_t a_ = (a); \
10290 uint32x4_t result = vcombine_u32 \
10291 (a_, vcreate_u32 \
10292 (__AARCH64_UINT64_C (0x0))); \
10293 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10294 : "+w"(result) \
10295 : "w"(b_), "i"(c) \
10296 : /* No clobbers */); \
10297 result; \
10300 #define vqrshrun_high_n_s16(a, b, c) \
10301 __extension__ \
10302 ({ \
10303 int16x8_t b_ = (b); \
10304 uint8x8_t a_ = (a); \
10305 uint8x16_t result = vcombine_u8 \
10306 (a_, vcreate_u8 \
10307 (__AARCH64_UINT64_C (0x0))); \
10308 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10309 : "+w"(result) \
10310 : "w"(b_), "i"(c) \
10311 : /* No clobbers */); \
10312 result; \
10315 #define vqrshrun_high_n_s32(a, b, c) \
10316 __extension__ \
10317 ({ \
10318 int32x4_t b_ = (b); \
10319 uint16x4_t a_ = (a); \
10320 uint16x8_t result = vcombine_u16 \
10321 (a_, vcreate_u16 \
10322 (__AARCH64_UINT64_C (0x0))); \
10323 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10324 : "+w"(result) \
10325 : "w"(b_), "i"(c) \
10326 : /* No clobbers */); \
10327 result; \
10330 #define vqrshrun_high_n_s64(a, b, c) \
10331 __extension__ \
10332 ({ \
10333 int64x2_t b_ = (b); \
10334 uint32x2_t a_ = (a); \
10335 uint32x4_t result = vcombine_u32 \
10336 (a_, vcreate_u32 \
10337 (__AARCH64_UINT64_C (0x0))); \
10338 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10339 : "+w"(result) \
10340 : "w"(b_), "i"(c) \
10341 : /* No clobbers */); \
10342 result; \
10345 #define vqshrn_high_n_s16(a, b, c) \
10346 __extension__ \
10347 ({ \
10348 int16x8_t b_ = (b); \
10349 int8x8_t a_ = (a); \
10350 int8x16_t result = vcombine_s8 \
10351 (a_, vcreate_s8 \
10352 (__AARCH64_UINT64_C (0x0))); \
10353 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10354 : "+w"(result) \
10355 : "w"(b_), "i"(c) \
10356 : /* No clobbers */); \
10357 result; \
10360 #define vqshrn_high_n_s32(a, b, c) \
10361 __extension__ \
10362 ({ \
10363 int32x4_t b_ = (b); \
10364 int16x4_t a_ = (a); \
10365 int16x8_t result = vcombine_s16 \
10366 (a_, vcreate_s16 \
10367 (__AARCH64_UINT64_C (0x0))); \
10368 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10369 : "+w"(result) \
10370 : "w"(b_), "i"(c) \
10371 : /* No clobbers */); \
10372 result; \
10375 #define vqshrn_high_n_s64(a, b, c) \
10376 __extension__ \
10377 ({ \
10378 int64x2_t b_ = (b); \
10379 int32x2_t a_ = (a); \
10380 int32x4_t result = vcombine_s32 \
10381 (a_, vcreate_s32 \
10382 (__AARCH64_UINT64_C (0x0))); \
10383 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10384 : "+w"(result) \
10385 : "w"(b_), "i"(c) \
10386 : /* No clobbers */); \
10387 result; \
10390 #define vqshrn_high_n_u16(a, b, c) \
10391 __extension__ \
10392 ({ \
10393 uint16x8_t b_ = (b); \
10394 uint8x8_t a_ = (a); \
10395 uint8x16_t result = vcombine_u8 \
10396 (a_, vcreate_u8 \
10397 (__AARCH64_UINT64_C (0x0))); \
10398 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10399 : "+w"(result) \
10400 : "w"(b_), "i"(c) \
10401 : /* No clobbers */); \
10402 result; \
10405 #define vqshrn_high_n_u32(a, b, c) \
10406 __extension__ \
10407 ({ \
10408 uint32x4_t b_ = (b); \
10409 uint16x4_t a_ = (a); \
10410 uint16x8_t result = vcombine_u16 \
10411 (a_, vcreate_u16 \
10412 (__AARCH64_UINT64_C (0x0))); \
10413 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10414 : "+w"(result) \
10415 : "w"(b_), "i"(c) \
10416 : /* No clobbers */); \
10417 result; \
10420 #define vqshrn_high_n_u64(a, b, c) \
10421 __extension__ \
10422 ({ \
10423 uint64x2_t b_ = (b); \
10424 uint32x2_t a_ = (a); \
10425 uint32x4_t result = vcombine_u32 \
10426 (a_, vcreate_u32 \
10427 (__AARCH64_UINT64_C (0x0))); \
10428 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10429 : "+w"(result) \
10430 : "w"(b_), "i"(c) \
10431 : /* No clobbers */); \
10432 result; \
10435 #define vqshrun_high_n_s16(a, b, c) \
10436 __extension__ \
10437 ({ \
10438 int16x8_t b_ = (b); \
10439 uint8x8_t a_ = (a); \
10440 uint8x16_t result = vcombine_u8 \
10441 (a_, vcreate_u8 \
10442 (__AARCH64_UINT64_C (0x0))); \
10443 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10444 : "+w"(result) \
10445 : "w"(b_), "i"(c) \
10446 : /* No clobbers */); \
10447 result; \
10450 #define vqshrun_high_n_s32(a, b, c) \
10451 __extension__ \
10452 ({ \
10453 int32x4_t b_ = (b); \
10454 uint16x4_t a_ = (a); \
10455 uint16x8_t result = vcombine_u16 \
10456 (a_, vcreate_u16 \
10457 (__AARCH64_UINT64_C (0x0))); \
10458 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10459 : "+w"(result) \
10460 : "w"(b_), "i"(c) \
10461 : /* No clobbers */); \
10462 result; \
10465 #define vqshrun_high_n_s64(a, b, c) \
10466 __extension__ \
10467 ({ \
10468 int64x2_t b_ = (b); \
10469 uint32x2_t a_ = (a); \
10470 uint32x4_t result = vcombine_u32 \
10471 (a_, vcreate_u32 \
10472 (__AARCH64_UINT64_C (0x0))); \
10473 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10474 : "+w"(result) \
10475 : "w"(b_), "i"(c) \
10476 : /* No clobbers */); \
10477 result; \
10480 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10481 vrbit_s8 (int8x8_t a)
10483 int8x8_t result;
10484 __asm__ ("rbit %0.8b,%1.8b"
10485 : "=w"(result)
10486 : "w"(a)
10487 : /* No clobbers */);
10488 return result;
10491 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10492 vrbit_u8 (uint8x8_t a)
10494 uint8x8_t result;
10495 __asm__ ("rbit %0.8b,%1.8b"
10496 : "=w"(result)
10497 : "w"(a)
10498 : /* No clobbers */);
10499 return result;
10502 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10503 vrbitq_s8 (int8x16_t a)
10505 int8x16_t result;
10506 __asm__ ("rbit %0.16b,%1.16b"
10507 : "=w"(result)
10508 : "w"(a)
10509 : /* No clobbers */);
10510 return result;
10513 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10514 vrbitq_u8 (uint8x16_t a)
10516 uint8x16_t result;
10517 __asm__ ("rbit %0.16b,%1.16b"
10518 : "=w"(result)
10519 : "w"(a)
10520 : /* No clobbers */);
10521 return result;
10524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10525 vrecpe_u32 (uint32x2_t a)
10527 uint32x2_t result;
10528 __asm__ ("urecpe %0.2s,%1.2s"
10529 : "=w"(result)
10530 : "w"(a)
10531 : /* No clobbers */);
10532 return result;
10535 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10536 vrecpeq_u32 (uint32x4_t a)
10538 uint32x4_t result;
10539 __asm__ ("urecpe %0.4s,%1.4s"
10540 : "=w"(result)
10541 : "w"(a)
10542 : /* No clobbers */);
10543 return result;
10546 #define vrshrn_high_n_s16(a, b, c) \
10547 __extension__ \
10548 ({ \
10549 int16x8_t b_ = (b); \
10550 int8x8_t a_ = (a); \
10551 int8x16_t result = vcombine_s8 \
10552 (a_, vcreate_s8 \
10553 (__AARCH64_UINT64_C (0x0))); \
10554 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10555 : "+w"(result) \
10556 : "w"(b_), "i"(c) \
10557 : /* No clobbers */); \
10558 result; \
10561 #define vrshrn_high_n_s32(a, b, c) \
10562 __extension__ \
10563 ({ \
10564 int32x4_t b_ = (b); \
10565 int16x4_t a_ = (a); \
10566 int16x8_t result = vcombine_s16 \
10567 (a_, vcreate_s16 \
10568 (__AARCH64_UINT64_C (0x0))); \
10569 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10570 : "+w"(result) \
10571 : "w"(b_), "i"(c) \
10572 : /* No clobbers */); \
10573 result; \
10576 #define vrshrn_high_n_s64(a, b, c) \
10577 __extension__ \
10578 ({ \
10579 int64x2_t b_ = (b); \
10580 int32x2_t a_ = (a); \
10581 int32x4_t result = vcombine_s32 \
10582 (a_, vcreate_s32 \
10583 (__AARCH64_UINT64_C (0x0))); \
10584 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10585 : "+w"(result) \
10586 : "w"(b_), "i"(c) \
10587 : /* No clobbers */); \
10588 result; \
10591 #define vrshrn_high_n_u16(a, b, c) \
10592 __extension__ \
10593 ({ \
10594 uint16x8_t b_ = (b); \
10595 uint8x8_t a_ = (a); \
10596 uint8x16_t result = vcombine_u8 \
10597 (a_, vcreate_u8 \
10598 (__AARCH64_UINT64_C (0x0))); \
10599 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10600 : "+w"(result) \
10601 : "w"(b_), "i"(c) \
10602 : /* No clobbers */); \
10603 result; \
10606 #define vrshrn_high_n_u32(a, b, c) \
10607 __extension__ \
10608 ({ \
10609 uint32x4_t b_ = (b); \
10610 uint16x4_t a_ = (a); \
10611 uint16x8_t result = vcombine_u16 \
10612 (a_, vcreate_u16 \
10613 (__AARCH64_UINT64_C (0x0))); \
10614 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10615 : "+w"(result) \
10616 : "w"(b_), "i"(c) \
10617 : /* No clobbers */); \
10618 result; \
10621 #define vrshrn_high_n_u64(a, b, c) \
10622 __extension__ \
10623 ({ \
10624 uint64x2_t b_ = (b); \
10625 uint32x2_t a_ = (a); \
10626 uint32x4_t result = vcombine_u32 \
10627 (a_, vcreate_u32 \
10628 (__AARCH64_UINT64_C (0x0))); \
10629 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10630 : "+w"(result) \
10631 : "w"(b_), "i"(c) \
10632 : /* No clobbers */); \
10633 result; \
10636 #define vrshrn_n_s16(a, b) \
10637 __extension__ \
10638 ({ \
10639 int16x8_t a_ = (a); \
10640 int8x8_t result; \
10641 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10642 : "=w"(result) \
10643 : "w"(a_), "i"(b) \
10644 : /* No clobbers */); \
10645 result; \
10648 #define vrshrn_n_s32(a, b) \
10649 __extension__ \
10650 ({ \
10651 int32x4_t a_ = (a); \
10652 int16x4_t result; \
10653 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10654 : "=w"(result) \
10655 : "w"(a_), "i"(b) \
10656 : /* No clobbers */); \
10657 result; \
10660 #define vrshrn_n_s64(a, b) \
10661 __extension__ \
10662 ({ \
10663 int64x2_t a_ = (a); \
10664 int32x2_t result; \
10665 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10666 : "=w"(result) \
10667 : "w"(a_), "i"(b) \
10668 : /* No clobbers */); \
10669 result; \
10672 #define vrshrn_n_u16(a, b) \
10673 __extension__ \
10674 ({ \
10675 uint16x8_t a_ = (a); \
10676 uint8x8_t result; \
10677 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10678 : "=w"(result) \
10679 : "w"(a_), "i"(b) \
10680 : /* No clobbers */); \
10681 result; \
10684 #define vrshrn_n_u32(a, b) \
10685 __extension__ \
10686 ({ \
10687 uint32x4_t a_ = (a); \
10688 uint16x4_t result; \
10689 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10690 : "=w"(result) \
10691 : "w"(a_), "i"(b) \
10692 : /* No clobbers */); \
10693 result; \
10696 #define vrshrn_n_u64(a, b) \
10697 __extension__ \
10698 ({ \
10699 uint64x2_t a_ = (a); \
10700 uint32x2_t result; \
10701 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10702 : "=w"(result) \
10703 : "w"(a_), "i"(b) \
10704 : /* No clobbers */); \
10705 result; \
10708 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10709 vrsqrte_f32 (float32x2_t a)
10711 float32x2_t result;
10712 __asm__ ("frsqrte %0.2s,%1.2s"
10713 : "=w"(result)
10714 : "w"(a)
10715 : /* No clobbers */);
10716 return result;
10719 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10720 vrsqrte_f64 (float64x1_t a)
10722 float64x1_t result;
10723 __asm__ ("frsqrte %d0,%d1"
10724 : "=w"(result)
10725 : "w"(a)
10726 : /* No clobbers */);
10727 return result;
10730 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10731 vrsqrte_u32 (uint32x2_t a)
10733 uint32x2_t result;
10734 __asm__ ("ursqrte %0.2s,%1.2s"
10735 : "=w"(result)
10736 : "w"(a)
10737 : /* No clobbers */);
10738 return result;
10741 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10742 vrsqrted_f64 (float64_t a)
10744 float64_t result;
10745 __asm__ ("frsqrte %d0,%d1"
10746 : "=w"(result)
10747 : "w"(a)
10748 : /* No clobbers */);
10749 return result;
10752 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10753 vrsqrteq_f32 (float32x4_t a)
10755 float32x4_t result;
10756 __asm__ ("frsqrte %0.4s,%1.4s"
10757 : "=w"(result)
10758 : "w"(a)
10759 : /* No clobbers */);
10760 return result;
10763 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10764 vrsqrteq_f64 (float64x2_t a)
10766 float64x2_t result;
10767 __asm__ ("frsqrte %0.2d,%1.2d"
10768 : "=w"(result)
10769 : "w"(a)
10770 : /* No clobbers */);
10771 return result;
10774 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10775 vrsqrteq_u32 (uint32x4_t a)
10777 uint32x4_t result;
10778 __asm__ ("ursqrte %0.4s,%1.4s"
10779 : "=w"(result)
10780 : "w"(a)
10781 : /* No clobbers */);
10782 return result;
10785 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10786 vrsqrtes_f32 (float32_t a)
10788 float32_t result;
10789 __asm__ ("frsqrte %s0,%s1"
10790 : "=w"(result)
10791 : "w"(a)
10792 : /* No clobbers */);
10793 return result;
10796 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10797 vrsqrts_f32 (float32x2_t a, float32x2_t b)
10799 float32x2_t result;
10800 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
10801 : "=w"(result)
10802 : "w"(a), "w"(b)
10803 : /* No clobbers */);
10804 return result;
10807 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10808 vrsqrtsd_f64 (float64_t a, float64_t b)
10810 float64_t result;
10811 __asm__ ("frsqrts %d0,%d1,%d2"
10812 : "=w"(result)
10813 : "w"(a), "w"(b)
10814 : /* No clobbers */);
10815 return result;
10818 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10819 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10821 float32x4_t result;
10822 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10823 : "=w"(result)
10824 : "w"(a), "w"(b)
10825 : /* No clobbers */);
10826 return result;
10829 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10830 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10832 float64x2_t result;
10833 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10834 : "=w"(result)
10835 : "w"(a), "w"(b)
10836 : /* No clobbers */);
10837 return result;
10840 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10841 vrsqrtss_f32 (float32_t a, float32_t b)
10843 float32_t result;
10844 __asm__ ("frsqrts %s0,%s1,%s2"
10845 : "=w"(result)
10846 : "w"(a), "w"(b)
10847 : /* No clobbers */);
10848 return result;
10851 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10852 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
10854 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10855 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10856 : "+w"(result)
10857 : "w"(b), "w"(c)
10858 : /* No clobbers */);
10859 return result;
10862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10863 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
10865 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10866 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10867 : "+w"(result)
10868 : "w"(b), "w"(c)
10869 : /* No clobbers */);
10870 return result;
10873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10874 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
10876 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10877 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10878 : "+w"(result)
10879 : "w"(b), "w"(c)
10880 : /* No clobbers */);
10881 return result;
10884 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10885 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
10887 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10888 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10889 : "+w"(result)
10890 : "w"(b), "w"(c)
10891 : /* No clobbers */);
10892 return result;
10895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10896 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
10898 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10899 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10900 : "+w"(result)
10901 : "w"(b), "w"(c)
10902 : /* No clobbers */);
10903 return result;
10906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10907 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
10909 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10910 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10911 : "+w"(result)
10912 : "w"(b), "w"(c)
10913 : /* No clobbers */);
10914 return result;
10917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10918 vrsubhn_s16 (int16x8_t a, int16x8_t b)
10920 int8x8_t result;
10921 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10922 : "=w"(result)
10923 : "w"(a), "w"(b)
10924 : /* No clobbers */);
10925 return result;
10928 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10929 vrsubhn_s32 (int32x4_t a, int32x4_t b)
10931 int16x4_t result;
10932 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10933 : "=w"(result)
10934 : "w"(a), "w"(b)
10935 : /* No clobbers */);
10936 return result;
10939 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10940 vrsubhn_s64 (int64x2_t a, int64x2_t b)
10942 int32x2_t result;
10943 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10944 : "=w"(result)
10945 : "w"(a), "w"(b)
10946 : /* No clobbers */);
10947 return result;
10950 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10951 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
10953 uint8x8_t result;
10954 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10955 : "=w"(result)
10956 : "w"(a), "w"(b)
10957 : /* No clobbers */);
10958 return result;
10961 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10962 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
10964 uint16x4_t result;
10965 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10966 : "=w"(result)
10967 : "w"(a), "w"(b)
10968 : /* No clobbers */);
10969 return result;
10972 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10973 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
10975 uint32x2_t result;
10976 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10977 : "=w"(result)
10978 : "w"(a), "w"(b)
10979 : /* No clobbers */);
10980 return result;
10983 #define vset_lane_f32(a, b, c) \
10984 __extension__ \
10985 ({ \
10986 float32x2_t b_ = (b); \
10987 float32_t a_ = (a); \
10988 float32x2_t result; \
10989 __asm__ ("ins %0.s[%3], %w1" \
10990 : "=w"(result) \
10991 : "r"(a_), "0"(b_), "i"(c) \
10992 : /* No clobbers */); \
10993 result; \
10996 #define vset_lane_f64(a, b, c) \
10997 __extension__ \
10998 ({ \
10999 float64x1_t b_ = (b); \
11000 float64_t a_ = (a); \
11001 float64x1_t result; \
11002 __asm__ ("ins %0.d[%3], %x1" \
11003 : "=w"(result) \
11004 : "r"(a_), "0"(b_), "i"(c) \
11005 : /* No clobbers */); \
11006 result; \
11009 #define vset_lane_p8(a, b, c) \
11010 __extension__ \
11011 ({ \
11012 poly8x8_t b_ = (b); \
11013 poly8_t a_ = (a); \
11014 poly8x8_t result; \
11015 __asm__ ("ins %0.b[%3], %w1" \
11016 : "=w"(result) \
11017 : "r"(a_), "0"(b_), "i"(c) \
11018 : /* No clobbers */); \
11019 result; \
11022 #define vset_lane_p16(a, b, c) \
11023 __extension__ \
11024 ({ \
11025 poly16x4_t b_ = (b); \
11026 poly16_t a_ = (a); \
11027 poly16x4_t result; \
11028 __asm__ ("ins %0.h[%3], %w1" \
11029 : "=w"(result) \
11030 : "r"(a_), "0"(b_), "i"(c) \
11031 : /* No clobbers */); \
11032 result; \
11035 #define vset_lane_s8(a, b, c) \
11036 __extension__ \
11037 ({ \
11038 int8x8_t b_ = (b); \
11039 int8_t a_ = (a); \
11040 int8x8_t result; \
11041 __asm__ ("ins %0.b[%3], %w1" \
11042 : "=w"(result) \
11043 : "r"(a_), "0"(b_), "i"(c) \
11044 : /* No clobbers */); \
11045 result; \
11048 #define vset_lane_s16(a, b, c) \
11049 __extension__ \
11050 ({ \
11051 int16x4_t b_ = (b); \
11052 int16_t a_ = (a); \
11053 int16x4_t result; \
11054 __asm__ ("ins %0.h[%3], %w1" \
11055 : "=w"(result) \
11056 : "r"(a_), "0"(b_), "i"(c) \
11057 : /* No clobbers */); \
11058 result; \
11061 #define vset_lane_s32(a, b, c) \
11062 __extension__ \
11063 ({ \
11064 int32x2_t b_ = (b); \
11065 int32_t a_ = (a); \
11066 int32x2_t result; \
11067 __asm__ ("ins %0.s[%3], %w1" \
11068 : "=w"(result) \
11069 : "r"(a_), "0"(b_), "i"(c) \
11070 : /* No clobbers */); \
11071 result; \
11074 #define vset_lane_s64(a, b, c) \
11075 __extension__ \
11076 ({ \
11077 int64x1_t b_ = (b); \
11078 int64_t a_ = (a); \
11079 int64x1_t result; \
11080 __asm__ ("ins %0.d[%3], %x1" \
11081 : "=w"(result) \
11082 : "r"(a_), "0"(b_), "i"(c) \
11083 : /* No clobbers */); \
11084 result; \
11087 #define vset_lane_u8(a, b, c) \
11088 __extension__ \
11089 ({ \
11090 uint8x8_t b_ = (b); \
11091 uint8_t a_ = (a); \
11092 uint8x8_t result; \
11093 __asm__ ("ins %0.b[%3], %w1" \
11094 : "=w"(result) \
11095 : "r"(a_), "0"(b_), "i"(c) \
11096 : /* No clobbers */); \
11097 result; \
11100 #define vset_lane_u16(a, b, c) \
11101 __extension__ \
11102 ({ \
11103 uint16x4_t b_ = (b); \
11104 uint16_t a_ = (a); \
11105 uint16x4_t result; \
11106 __asm__ ("ins %0.h[%3], %w1" \
11107 : "=w"(result) \
11108 : "r"(a_), "0"(b_), "i"(c) \
11109 : /* No clobbers */); \
11110 result; \
11113 #define vset_lane_u32(a, b, c) \
11114 __extension__ \
11115 ({ \
11116 uint32x2_t b_ = (b); \
11117 uint32_t a_ = (a); \
11118 uint32x2_t result; \
11119 __asm__ ("ins %0.s[%3], %w1" \
11120 : "=w"(result) \
11121 : "r"(a_), "0"(b_), "i"(c) \
11122 : /* No clobbers */); \
11123 result; \
11126 #define vset_lane_u64(a, b, c) \
11127 __extension__ \
11128 ({ \
11129 uint64x1_t b_ = (b); \
11130 uint64_t a_ = (a); \
11131 uint64x1_t result; \
11132 __asm__ ("ins %0.d[%3], %x1" \
11133 : "=w"(result) \
11134 : "r"(a_), "0"(b_), "i"(c) \
11135 : /* No clobbers */); \
11136 result; \
11139 #define vsetq_lane_f32(a, b, c) \
11140 __extension__ \
11141 ({ \
11142 float32x4_t b_ = (b); \
11143 float32_t a_ = (a); \
11144 float32x4_t result; \
11145 __asm__ ("ins %0.s[%3], %w1" \
11146 : "=w"(result) \
11147 : "r"(a_), "0"(b_), "i"(c) \
11148 : /* No clobbers */); \
11149 result; \
11152 #define vsetq_lane_f64(a, b, c) \
11153 __extension__ \
11154 ({ \
11155 float64x2_t b_ = (b); \
11156 float64_t a_ = (a); \
11157 float64x2_t result; \
11158 __asm__ ("ins %0.d[%3], %x1" \
11159 : "=w"(result) \
11160 : "r"(a_), "0"(b_), "i"(c) \
11161 : /* No clobbers */); \
11162 result; \
11165 #define vsetq_lane_p8(a, b, c) \
11166 __extension__ \
11167 ({ \
11168 poly8x16_t b_ = (b); \
11169 poly8_t a_ = (a); \
11170 poly8x16_t result; \
11171 __asm__ ("ins %0.b[%3], %w1" \
11172 : "=w"(result) \
11173 : "r"(a_), "0"(b_), "i"(c) \
11174 : /* No clobbers */); \
11175 result; \
11178 #define vsetq_lane_p16(a, b, c) \
11179 __extension__ \
11180 ({ \
11181 poly16x8_t b_ = (b); \
11182 poly16_t a_ = (a); \
11183 poly16x8_t result; \
11184 __asm__ ("ins %0.h[%3], %w1" \
11185 : "=w"(result) \
11186 : "r"(a_), "0"(b_), "i"(c) \
11187 : /* No clobbers */); \
11188 result; \
11191 #define vsetq_lane_s8(a, b, c) \
11192 __extension__ \
11193 ({ \
11194 int8x16_t b_ = (b); \
11195 int8_t a_ = (a); \
11196 int8x16_t result; \
11197 __asm__ ("ins %0.b[%3], %w1" \
11198 : "=w"(result) \
11199 : "r"(a_), "0"(b_), "i"(c) \
11200 : /* No clobbers */); \
11201 result; \
11204 #define vsetq_lane_s16(a, b, c) \
11205 __extension__ \
11206 ({ \
11207 int16x8_t b_ = (b); \
11208 int16_t a_ = (a); \
11209 int16x8_t result; \
11210 __asm__ ("ins %0.h[%3], %w1" \
11211 : "=w"(result) \
11212 : "r"(a_), "0"(b_), "i"(c) \
11213 : /* No clobbers */); \
11214 result; \
11217 #define vsetq_lane_s32(a, b, c) \
11218 __extension__ \
11219 ({ \
11220 int32x4_t b_ = (b); \
11221 int32_t a_ = (a); \
11222 int32x4_t result; \
11223 __asm__ ("ins %0.s[%3], %w1" \
11224 : "=w"(result) \
11225 : "r"(a_), "0"(b_), "i"(c) \
11226 : /* No clobbers */); \
11227 result; \
11230 #define vsetq_lane_s64(a, b, c) \
11231 __extension__ \
11232 ({ \
11233 int64x2_t b_ = (b); \
11234 int64_t a_ = (a); \
11235 int64x2_t result; \
11236 __asm__ ("ins %0.d[%3], %x1" \
11237 : "=w"(result) \
11238 : "r"(a_), "0"(b_), "i"(c) \
11239 : /* No clobbers */); \
11240 result; \
11243 #define vsetq_lane_u8(a, b, c) \
11244 __extension__ \
11245 ({ \
11246 uint8x16_t b_ = (b); \
11247 uint8_t a_ = (a); \
11248 uint8x16_t result; \
11249 __asm__ ("ins %0.b[%3], %w1" \
11250 : "=w"(result) \
11251 : "r"(a_), "0"(b_), "i"(c) \
11252 : /* No clobbers */); \
11253 result; \
11256 #define vsetq_lane_u16(a, b, c) \
11257 __extension__ \
11258 ({ \
11259 uint16x8_t b_ = (b); \
11260 uint16_t a_ = (a); \
11261 uint16x8_t result; \
11262 __asm__ ("ins %0.h[%3], %w1" \
11263 : "=w"(result) \
11264 : "r"(a_), "0"(b_), "i"(c) \
11265 : /* No clobbers */); \
11266 result; \
11269 #define vsetq_lane_u32(a, b, c) \
11270 __extension__ \
11271 ({ \
11272 uint32x4_t b_ = (b); \
11273 uint32_t a_ = (a); \
11274 uint32x4_t result; \
11275 __asm__ ("ins %0.s[%3], %w1" \
11276 : "=w"(result) \
11277 : "r"(a_), "0"(b_), "i"(c) \
11278 : /* No clobbers */); \
11279 result; \
11282 #define vsetq_lane_u64(a, b, c) \
11283 __extension__ \
11284 ({ \
11285 uint64x2_t b_ = (b); \
11286 uint64_t a_ = (a); \
11287 uint64x2_t result; \
11288 __asm__ ("ins %0.d[%3], %x1" \
11289 : "=w"(result) \
11290 : "r"(a_), "0"(b_), "i"(c) \
11291 : /* No clobbers */); \
11292 result; \
11295 #define vshrn_high_n_s16(a, b, c) \
11296 __extension__ \
11297 ({ \
11298 int16x8_t b_ = (b); \
11299 int8x8_t a_ = (a); \
11300 int8x16_t result = vcombine_s8 \
11301 (a_, vcreate_s8 \
11302 (__AARCH64_UINT64_C (0x0))); \
11303 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11304 : "+w"(result) \
11305 : "w"(b_), "i"(c) \
11306 : /* No clobbers */); \
11307 result; \
11310 #define vshrn_high_n_s32(a, b, c) \
11311 __extension__ \
11312 ({ \
11313 int32x4_t b_ = (b); \
11314 int16x4_t a_ = (a); \
11315 int16x8_t result = vcombine_s16 \
11316 (a_, vcreate_s16 \
11317 (__AARCH64_UINT64_C (0x0))); \
11318 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11319 : "+w"(result) \
11320 : "w"(b_), "i"(c) \
11321 : /* No clobbers */); \
11322 result; \
11325 #define vshrn_high_n_s64(a, b, c) \
11326 __extension__ \
11327 ({ \
11328 int64x2_t b_ = (b); \
11329 int32x2_t a_ = (a); \
11330 int32x4_t result = vcombine_s32 \
11331 (a_, vcreate_s32 \
11332 (__AARCH64_UINT64_C (0x0))); \
11333 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11334 : "+w"(result) \
11335 : "w"(b_), "i"(c) \
11336 : /* No clobbers */); \
11337 result; \
11340 #define vshrn_high_n_u16(a, b, c) \
11341 __extension__ \
11342 ({ \
11343 uint16x8_t b_ = (b); \
11344 uint8x8_t a_ = (a); \
11345 uint8x16_t result = vcombine_u8 \
11346 (a_, vcreate_u8 \
11347 (__AARCH64_UINT64_C (0x0))); \
11348 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11349 : "+w"(result) \
11350 : "w"(b_), "i"(c) \
11351 : /* No clobbers */); \
11352 result; \
11355 #define vshrn_high_n_u32(a, b, c) \
11356 __extension__ \
11357 ({ \
11358 uint32x4_t b_ = (b); \
11359 uint16x4_t a_ = (a); \
11360 uint16x8_t result = vcombine_u16 \
11361 (a_, vcreate_u16 \
11362 (__AARCH64_UINT64_C (0x0))); \
11363 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11364 : "+w"(result) \
11365 : "w"(b_), "i"(c) \
11366 : /* No clobbers */); \
11367 result; \
11370 #define vshrn_high_n_u64(a, b, c) \
11371 __extension__ \
11372 ({ \
11373 uint64x2_t b_ = (b); \
11374 uint32x2_t a_ = (a); \
11375 uint32x4_t result = vcombine_u32 \
11376 (a_, vcreate_u32 \
11377 (__AARCH64_UINT64_C (0x0))); \
11378 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11379 : "+w"(result) \
11380 : "w"(b_), "i"(c) \
11381 : /* No clobbers */); \
11382 result; \
11385 #define vshrn_n_s16(a, b) \
11386 __extension__ \
11387 ({ \
11388 int16x8_t a_ = (a); \
11389 int8x8_t result; \
11390 __asm__ ("shrn %0.8b,%1.8h,%2" \
11391 : "=w"(result) \
11392 : "w"(a_), "i"(b) \
11393 : /* No clobbers */); \
11394 result; \
11397 #define vshrn_n_s32(a, b) \
11398 __extension__ \
11399 ({ \
11400 int32x4_t a_ = (a); \
11401 int16x4_t result; \
11402 __asm__ ("shrn %0.4h,%1.4s,%2" \
11403 : "=w"(result) \
11404 : "w"(a_), "i"(b) \
11405 : /* No clobbers */); \
11406 result; \
11409 #define vshrn_n_s64(a, b) \
11410 __extension__ \
11411 ({ \
11412 int64x2_t a_ = (a); \
11413 int32x2_t result; \
11414 __asm__ ("shrn %0.2s,%1.2d,%2" \
11415 : "=w"(result) \
11416 : "w"(a_), "i"(b) \
11417 : /* No clobbers */); \
11418 result; \
11421 #define vshrn_n_u16(a, b) \
11422 __extension__ \
11423 ({ \
11424 uint16x8_t a_ = (a); \
11425 uint8x8_t result; \
11426 __asm__ ("shrn %0.8b,%1.8h,%2" \
11427 : "=w"(result) \
11428 : "w"(a_), "i"(b) \
11429 : /* No clobbers */); \
11430 result; \
11433 #define vshrn_n_u32(a, b) \
11434 __extension__ \
11435 ({ \
11436 uint32x4_t a_ = (a); \
11437 uint16x4_t result; \
11438 __asm__ ("shrn %0.4h,%1.4s,%2" \
11439 : "=w"(result) \
11440 : "w"(a_), "i"(b) \
11441 : /* No clobbers */); \
11442 result; \
11445 #define vshrn_n_u64(a, b) \
11446 __extension__ \
11447 ({ \
11448 uint64x2_t a_ = (a); \
11449 uint32x2_t result; \
11450 __asm__ ("shrn %0.2s,%1.2d,%2" \
11451 : "=w"(result) \
11452 : "w"(a_), "i"(b) \
11453 : /* No clobbers */); \
11454 result; \
11457 #define vsli_n_p8(a, b, c) \
11458 __extension__ \
11459 ({ \
11460 poly8x8_t b_ = (b); \
11461 poly8x8_t a_ = (a); \
11462 poly8x8_t result; \
11463 __asm__ ("sli %0.8b,%2.8b,%3" \
11464 : "=w"(result) \
11465 : "0"(a_), "w"(b_), "i"(c) \
11466 : /* No clobbers */); \
11467 result; \
11470 #define vsli_n_p16(a, b, c) \
11471 __extension__ \
11472 ({ \
11473 poly16x4_t b_ = (b); \
11474 poly16x4_t a_ = (a); \
11475 poly16x4_t result; \
11476 __asm__ ("sli %0.4h,%2.4h,%3" \
11477 : "=w"(result) \
11478 : "0"(a_), "w"(b_), "i"(c) \
11479 : /* No clobbers */); \
11480 result; \
11483 #define vsliq_n_p8(a, b, c) \
11484 __extension__ \
11485 ({ \
11486 poly8x16_t b_ = (b); \
11487 poly8x16_t a_ = (a); \
11488 poly8x16_t result; \
11489 __asm__ ("sli %0.16b,%2.16b,%3" \
11490 : "=w"(result) \
11491 : "0"(a_), "w"(b_), "i"(c) \
11492 : /* No clobbers */); \
11493 result; \
11496 #define vsliq_n_p16(a, b, c) \
11497 __extension__ \
11498 ({ \
11499 poly16x8_t b_ = (b); \
11500 poly16x8_t a_ = (a); \
11501 poly16x8_t result; \
11502 __asm__ ("sli %0.8h,%2.8h,%3" \
11503 : "=w"(result) \
11504 : "0"(a_), "w"(b_), "i"(c) \
11505 : /* No clobbers */); \
11506 result; \
11509 #define vsri_n_p8(a, b, c) \
11510 __extension__ \
11511 ({ \
11512 poly8x8_t b_ = (b); \
11513 poly8x8_t a_ = (a); \
11514 poly8x8_t result; \
11515 __asm__ ("sri %0.8b,%2.8b,%3" \
11516 : "=w"(result) \
11517 : "0"(a_), "w"(b_), "i"(c) \
11518 : /* No clobbers */); \
11519 result; \
11522 #define vsri_n_p16(a, b, c) \
11523 __extension__ \
11524 ({ \
11525 poly16x4_t b_ = (b); \
11526 poly16x4_t a_ = (a); \
11527 poly16x4_t result; \
11528 __asm__ ("sri %0.4h,%2.4h,%3" \
11529 : "=w"(result) \
11530 : "0"(a_), "w"(b_), "i"(c) \
11531 : /* No clobbers */); \
11532 result; \
11535 #define vsriq_n_p8(a, b, c) \
11536 __extension__ \
11537 ({ \
11538 poly8x16_t b_ = (b); \
11539 poly8x16_t a_ = (a); \
11540 poly8x16_t result; \
11541 __asm__ ("sri %0.16b,%2.16b,%3" \
11542 : "=w"(result) \
11543 : "0"(a_), "w"(b_), "i"(c) \
11544 : /* No clobbers */); \
11545 result; \
11548 #define vsriq_n_p16(a, b, c) \
11549 __extension__ \
11550 ({ \
11551 poly16x8_t b_ = (b); \
11552 poly16x8_t a_ = (a); \
11553 poly16x8_t result; \
11554 __asm__ ("sri %0.8h,%2.8h,%3" \
11555 : "=w"(result) \
11556 : "0"(a_), "w"(b_), "i"(c) \
11557 : /* No clobbers */); \
11558 result; \
11561 #define vst1_lane_f32(a, b, c) \
11562 __extension__ \
11563 ({ \
11564 float32x2_t b_ = (b); \
11565 float32_t * a_ = (a); \
11566 __asm__ ("st1 {%1.s}[%2],[%0]" \
11568 : "r"(a_), "w"(b_), "i"(c) \
11569 : "memory"); \
11572 #define vst1_lane_f64(a, b, c) \
11573 __extension__ \
11574 ({ \
11575 float64x1_t b_ = (b); \
11576 float64_t * a_ = (a); \
11577 __asm__ ("st1 {%1.d}[%2],[%0]" \
11579 : "r"(a_), "w"(b_), "i"(c) \
11580 : "memory"); \
11583 #define vst1_lane_p8(a, b, c) \
11584 __extension__ \
11585 ({ \
11586 poly8x8_t b_ = (b); \
11587 poly8_t * a_ = (a); \
11588 __asm__ ("st1 {%1.b}[%2],[%0]" \
11590 : "r"(a_), "w"(b_), "i"(c) \
11591 : "memory"); \
11594 #define vst1_lane_p16(a, b, c) \
11595 __extension__ \
11596 ({ \
11597 poly16x4_t b_ = (b); \
11598 poly16_t * a_ = (a); \
11599 __asm__ ("st1 {%1.h}[%2],[%0]" \
11601 : "r"(a_), "w"(b_), "i"(c) \
11602 : "memory"); \
11605 #define vst1_lane_s8(a, b, c) \
11606 __extension__ \
11607 ({ \
11608 int8x8_t b_ = (b); \
11609 int8_t * a_ = (a); \
11610 __asm__ ("st1 {%1.b}[%2],[%0]" \
11612 : "r"(a_), "w"(b_), "i"(c) \
11613 : "memory"); \
11616 #define vst1_lane_s16(a, b, c) \
11617 __extension__ \
11618 ({ \
11619 int16x4_t b_ = (b); \
11620 int16_t * a_ = (a); \
11621 __asm__ ("st1 {%1.h}[%2],[%0]" \
11623 : "r"(a_), "w"(b_), "i"(c) \
11624 : "memory"); \
11627 #define vst1_lane_s32(a, b, c) \
11628 __extension__ \
11629 ({ \
11630 int32x2_t b_ = (b); \
11631 int32_t * a_ = (a); \
11632 __asm__ ("st1 {%1.s}[%2],[%0]" \
11634 : "r"(a_), "w"(b_), "i"(c) \
11635 : "memory"); \
11638 #define vst1_lane_s64(a, b, c) \
11639 __extension__ \
11640 ({ \
11641 int64x1_t b_ = (b); \
11642 int64_t * a_ = (a); \
11643 __asm__ ("st1 {%1.d}[%2],[%0]" \
11645 : "r"(a_), "w"(b_), "i"(c) \
11646 : "memory"); \
11649 #define vst1_lane_u8(a, b, c) \
11650 __extension__ \
11651 ({ \
11652 uint8x8_t b_ = (b); \
11653 uint8_t * a_ = (a); \
11654 __asm__ ("st1 {%1.b}[%2],[%0]" \
11656 : "r"(a_), "w"(b_), "i"(c) \
11657 : "memory"); \
11660 #define vst1_lane_u16(a, b, c) \
11661 __extension__ \
11662 ({ \
11663 uint16x4_t b_ = (b); \
11664 uint16_t * a_ = (a); \
11665 __asm__ ("st1 {%1.h}[%2],[%0]" \
11667 : "r"(a_), "w"(b_), "i"(c) \
11668 : "memory"); \
11671 #define vst1_lane_u32(a, b, c) \
11672 __extension__ \
11673 ({ \
11674 uint32x2_t b_ = (b); \
11675 uint32_t * a_ = (a); \
11676 __asm__ ("st1 {%1.s}[%2],[%0]" \
11678 : "r"(a_), "w"(b_), "i"(c) \
11679 : "memory"); \
11682 #define vst1_lane_u64(a, b, c) \
11683 __extension__ \
11684 ({ \
11685 uint64x1_t b_ = (b); \
11686 uint64_t * a_ = (a); \
11687 __asm__ ("st1 {%1.d}[%2],[%0]" \
11689 : "r"(a_), "w"(b_), "i"(c) \
11690 : "memory"); \
11694 #define vst1q_lane_f32(a, b, c) \
11695 __extension__ \
11696 ({ \
11697 float32x4_t b_ = (b); \
11698 float32_t * a_ = (a); \
11699 __asm__ ("st1 {%1.s}[%2],[%0]" \
11701 : "r"(a_), "w"(b_), "i"(c) \
11702 : "memory"); \
11705 #define vst1q_lane_f64(a, b, c) \
11706 __extension__ \
11707 ({ \
11708 float64x2_t b_ = (b); \
11709 float64_t * a_ = (a); \
11710 __asm__ ("st1 {%1.d}[%2],[%0]" \
11712 : "r"(a_), "w"(b_), "i"(c) \
11713 : "memory"); \
11716 #define vst1q_lane_p8(a, b, c) \
11717 __extension__ \
11718 ({ \
11719 poly8x16_t b_ = (b); \
11720 poly8_t * a_ = (a); \
11721 __asm__ ("st1 {%1.b}[%2],[%0]" \
11723 : "r"(a_), "w"(b_), "i"(c) \
11724 : "memory"); \
11727 #define vst1q_lane_p16(a, b, c) \
11728 __extension__ \
11729 ({ \
11730 poly16x8_t b_ = (b); \
11731 poly16_t * a_ = (a); \
11732 __asm__ ("st1 {%1.h}[%2],[%0]" \
11734 : "r"(a_), "w"(b_), "i"(c) \
11735 : "memory"); \
11738 #define vst1q_lane_s8(a, b, c) \
11739 __extension__ \
11740 ({ \
11741 int8x16_t b_ = (b); \
11742 int8_t * a_ = (a); \
11743 __asm__ ("st1 {%1.b}[%2],[%0]" \
11745 : "r"(a_), "w"(b_), "i"(c) \
11746 : "memory"); \
11749 #define vst1q_lane_s16(a, b, c) \
11750 __extension__ \
11751 ({ \
11752 int16x8_t b_ = (b); \
11753 int16_t * a_ = (a); \
11754 __asm__ ("st1 {%1.h}[%2],[%0]" \
11756 : "r"(a_), "w"(b_), "i"(c) \
11757 : "memory"); \
11760 #define vst1q_lane_s32(a, b, c) \
11761 __extension__ \
11762 ({ \
11763 int32x4_t b_ = (b); \
11764 int32_t * a_ = (a); \
11765 __asm__ ("st1 {%1.s}[%2],[%0]" \
11767 : "r"(a_), "w"(b_), "i"(c) \
11768 : "memory"); \
11771 #define vst1q_lane_s64(a, b, c) \
11772 __extension__ \
11773 ({ \
11774 int64x2_t b_ = (b); \
11775 int64_t * a_ = (a); \
11776 __asm__ ("st1 {%1.d}[%2],[%0]" \
11778 : "r"(a_), "w"(b_), "i"(c) \
11779 : "memory"); \
11782 #define vst1q_lane_u8(a, b, c) \
11783 __extension__ \
11784 ({ \
11785 uint8x16_t b_ = (b); \
11786 uint8_t * a_ = (a); \
11787 __asm__ ("st1 {%1.b}[%2],[%0]" \
11789 : "r"(a_), "w"(b_), "i"(c) \
11790 : "memory"); \
11793 #define vst1q_lane_u16(a, b, c) \
11794 __extension__ \
11795 ({ \
11796 uint16x8_t b_ = (b); \
11797 uint16_t * a_ = (a); \
11798 __asm__ ("st1 {%1.h}[%2],[%0]" \
11800 : "r"(a_), "w"(b_), "i"(c) \
11801 : "memory"); \
11804 #define vst1q_lane_u32(a, b, c) \
11805 __extension__ \
11806 ({ \
11807 uint32x4_t b_ = (b); \
11808 uint32_t * a_ = (a); \
11809 __asm__ ("st1 {%1.s}[%2],[%0]" \
11811 : "r"(a_), "w"(b_), "i"(c) \
11812 : "memory"); \
11815 #define vst1q_lane_u64(a, b, c) \
11816 __extension__ \
11817 ({ \
11818 uint64x2_t b_ = (b); \
11819 uint64_t * a_ = (a); \
11820 __asm__ ("st1 {%1.d}[%2],[%0]" \
11822 : "r"(a_), "w"(b_), "i"(c) \
11823 : "memory"); \
11826 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11827 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11829 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11830 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11831 : "+w"(result)
11832 : "w"(b), "w"(c)
11833 : /* No clobbers */);
11834 return result;
11837 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11838 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11840 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11841 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11842 : "+w"(result)
11843 : "w"(b), "w"(c)
11844 : /* No clobbers */);
11845 return result;
11848 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11849 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11851 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11852 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11853 : "+w"(result)
11854 : "w"(b), "w"(c)
11855 : /* No clobbers */);
11856 return result;
11859 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11860 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11862 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11863 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11864 : "+w"(result)
11865 : "w"(b), "w"(c)
11866 : /* No clobbers */);
11867 return result;
11870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11871 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11873 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11874 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11875 : "+w"(result)
11876 : "w"(b), "w"(c)
11877 : /* No clobbers */);
11878 return result;
11881 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11882 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11884 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11885 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11886 : "+w"(result)
11887 : "w"(b), "w"(c)
11888 : /* No clobbers */);
11889 return result;
11892 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11893 vsubhn_s16 (int16x8_t a, int16x8_t b)
11895 int8x8_t result;
11896 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11897 : "=w"(result)
11898 : "w"(a), "w"(b)
11899 : /* No clobbers */);
11900 return result;
11903 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11904 vsubhn_s32 (int32x4_t a, int32x4_t b)
11906 int16x4_t result;
11907 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11908 : "=w"(result)
11909 : "w"(a), "w"(b)
11910 : /* No clobbers */);
11911 return result;
11914 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11915 vsubhn_s64 (int64x2_t a, int64x2_t b)
11917 int32x2_t result;
11918 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11919 : "=w"(result)
11920 : "w"(a), "w"(b)
11921 : /* No clobbers */);
11922 return result;
11925 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11926 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
11928 uint8x8_t result;
11929 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11930 : "=w"(result)
11931 : "w"(a), "w"(b)
11932 : /* No clobbers */);
11933 return result;
11936 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11937 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
11939 uint16x4_t result;
11940 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11941 : "=w"(result)
11942 : "w"(a), "w"(b)
11943 : /* No clobbers */);
11944 return result;
11947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11948 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
11950 uint32x2_t result;
11951 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11952 : "=w"(result)
11953 : "w"(a), "w"(b)
11954 : /* No clobbers */);
11955 return result;
11958 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11959 vtst_p8 (poly8x8_t a, poly8x8_t b)
11961 uint8x8_t result;
11962 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
11963 : "=w"(result)
11964 : "w"(a), "w"(b)
11965 : /* No clobbers */);
11966 return result;
11969 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11970 vtst_p16 (poly16x4_t a, poly16x4_t b)
11972 uint16x4_t result;
11973 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
11974 : "=w"(result)
11975 : "w"(a), "w"(b)
11976 : /* No clobbers */);
11977 return result;
11980 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11981 vtstq_p8 (poly8x16_t a, poly8x16_t b)
11983 uint8x16_t result;
11984 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
11985 : "=w"(result)
11986 : "w"(a), "w"(b)
11987 : /* No clobbers */);
11988 return result;
11991 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11992 vtstq_p16 (poly16x8_t a, poly16x8_t b)
11994 uint16x8_t result;
11995 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
11996 : "=w"(result)
11997 : "w"(a), "w"(b)
11998 : /* No clobbers */);
11999 return result;
12002 /* End of temporary inline asm implementations. */
12004 /* Start of temporary inline asm for vldn, vstn and friends. */
12006 /* Create struct element types for duplicating loads.
12008 Create 2 element structures of:
12010 +------+----+----+----+----+
12011 | | 8 | 16 | 32 | 64 |
12012 +------+----+----+----+----+
12013 |int | Y | Y | N | N |
12014 +------+----+----+----+----+
12015 |uint | Y | Y | N | N |
12016 +------+----+----+----+----+
12017 |float | - | - | N | N |
12018 +------+----+----+----+----+
12019 |poly | Y | Y | - | - |
12020 +------+----+----+----+----+
12022 Create 3 element structures of:
12024 +------+----+----+----+----+
12025 | | 8 | 16 | 32 | 64 |
12026 +------+----+----+----+----+
12027 |int | Y | Y | Y | Y |
12028 +------+----+----+----+----+
12029 |uint | Y | Y | Y | Y |
12030 +------+----+----+----+----+
12031 |float | - | - | Y | Y |
12032 +------+----+----+----+----+
12033 |poly | Y | Y | - | - |
12034 +------+----+----+----+----+
12036 Create 4 element structures of:
12038 +------+----+----+----+----+
12039 | | 8 | 16 | 32 | 64 |
12040 +------+----+----+----+----+
12041 |int | Y | N | N | Y |
12042 +------+----+----+----+----+
12043 |uint | Y | N | N | Y |
12044 +------+----+----+----+----+
12045 |float | - | - | N | Y |
12046 +------+----+----+----+----+
12047 |poly | Y | N | - | - |
12048 +------+----+----+----+----+
12050 This is required for casting memory reference. */
12051 #define __STRUCTN(t, sz, nelem) \
12052 typedef struct t ## sz ## x ## nelem ## _t { \
12053 t ## sz ## _t val[nelem]; \
12054 } t ## sz ## x ## nelem ## _t;
12056 /* 2-element structs. */
12057 __STRUCTN (int, 8, 2)
12058 __STRUCTN (int, 16, 2)
12059 __STRUCTN (uint, 8, 2)
12060 __STRUCTN (uint, 16, 2)
12061 __STRUCTN (poly, 8, 2)
12062 __STRUCTN (poly, 16, 2)
12063 /* 3-element structs. */
12064 __STRUCTN (int, 8, 3)
12065 __STRUCTN (int, 16, 3)
12066 __STRUCTN (int, 32, 3)
12067 __STRUCTN (int, 64, 3)
12068 __STRUCTN (uint, 8, 3)
12069 __STRUCTN (uint, 16, 3)
12070 __STRUCTN (uint, 32, 3)
12071 __STRUCTN (uint, 64, 3)
12072 __STRUCTN (float, 32, 3)
12073 __STRUCTN (float, 64, 3)
12074 __STRUCTN (poly, 8, 3)
12075 __STRUCTN (poly, 16, 3)
12076 /* 4-element structs. */
12077 __STRUCTN (int, 8, 4)
12078 __STRUCTN (int, 64, 4)
12079 __STRUCTN (uint, 8, 4)
12080 __STRUCTN (uint, 64, 4)
12081 __STRUCTN (poly, 8, 4)
12082 __STRUCTN (float, 64, 4)
12083 #undef __STRUCTN
12085 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
12086 regsuffix, funcsuffix, Q) \
12087 __extension__ static __inline rettype \
12088 __attribute__ ((__always_inline__)) \
12089 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12091 rettype result; \
12092 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
12093 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
12094 : "=Q"(result) \
12095 : "Q"(*(const structtype *)ptr) \
12096 : "memory", "v16", "v17"); \
12097 return result; \
12100 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
12101 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
12102 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
12103 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
12104 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
12105 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
12106 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
12107 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
12108 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
12109 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
12110 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
12111 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
12112 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
12113 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
12114 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
12115 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
12116 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
12117 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
12118 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
12119 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
12120 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
12121 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
12122 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
12123 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
12125 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
12126 lnsuffix, funcsuffix, Q) \
12127 __extension__ static __inline rettype \
12128 __attribute__ ((__always_inline__)) \
12129 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12130 rettype b, const int c) \
12132 rettype result; \
12133 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
12134 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
12135 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
12136 : "=Q"(result) \
12137 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12138 : "memory", "v16", "v17"); \
12139 return result; \
12142 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
12143 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
12144 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
12145 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
12146 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
12147 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
12148 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
12149 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
12150 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
12151 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
12152 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
12153 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
12154 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
12155 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
12156 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
12157 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
12158 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
12159 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
12160 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
12161 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
12162 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
12163 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
12164 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
12165 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
12167 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
12168 regsuffix, funcsuffix, Q) \
12169 __extension__ static __inline rettype \
12170 __attribute__ ((__always_inline__)) \
12171 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12173 rettype result; \
12174 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12175 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12176 : "=Q"(result) \
12177 : "Q"(*(const structtype *)ptr) \
12178 : "memory", "v16", "v17", "v18"); \
12179 return result; \
12182 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
12183 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
12184 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
12185 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
12186 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
12187 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
12188 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
12189 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
12190 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
12191 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
12192 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
12193 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
12194 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
12195 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
12196 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
12197 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
12198 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
12199 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
12200 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
12201 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
12202 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
12203 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
12204 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
12205 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
12207 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
12208 lnsuffix, funcsuffix, Q) \
12209 __extension__ static __inline rettype \
12210 __attribute__ ((__always_inline__)) \
12211 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12212 rettype b, const int c) \
12214 rettype result; \
12215 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12216 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
12217 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12218 : "=Q"(result) \
12219 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12220 : "memory", "v16", "v17", "v18"); \
12221 return result; \
12224 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
12225 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
12226 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
12227 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
12228 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
12229 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
12230 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
12231 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
12232 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
12233 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
12234 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
12235 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
12236 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
12237 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
12238 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
12239 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
12240 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
12241 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
12242 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
12243 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
12244 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
12245 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
12246 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
12247 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
12249 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
12250 regsuffix, funcsuffix, Q) \
12251 __extension__ static __inline rettype \
12252 __attribute__ ((__always_inline__)) \
12253 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12255 rettype result; \
12256 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12257 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12258 : "=Q"(result) \
12259 : "Q"(*(const structtype *)ptr) \
12260 : "memory", "v16", "v17", "v18", "v19"); \
12261 return result; \
12264 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
12265 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
12266 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
12267 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
12268 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
12269 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
12270 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
12271 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
12272 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
12273 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
12274 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
12275 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
12276 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
12277 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
12278 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
12279 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
12280 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
12281 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
12282 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
12283 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
12284 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
12285 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
12286 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
12287 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
12289 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
12290 lnsuffix, funcsuffix, Q) \
12291 __extension__ static __inline rettype \
12292 __attribute__ ((__always_inline__)) \
12293 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12294 rettype b, const int c) \
12296 rettype result; \
12297 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12298 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
12299 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12300 : "=Q"(result) \
12301 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12302 : "memory", "v16", "v17", "v18", "v19"); \
12303 return result; \
12306 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
12307 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
12308 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
12309 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
12310 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
12311 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
12312 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
12313 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
12314 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
12315 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
12316 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
12317 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
12318 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
12319 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
12320 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
12321 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
12322 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
12323 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
12324 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
12325 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
12326 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
12327 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
12328 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
12329 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
12331 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
12332 mode, ptr_mode, funcsuffix, signedtype) \
12333 __extension__ static __inline void \
12334 __attribute__ ((__always_inline__)) \
12335 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
12336 intype __b, const int __c) \
12338 __builtin_aarch64_simd_oi __o; \
12339 largetype __temp; \
12340 __temp.val[0] \
12341 = vcombine_##funcsuffix (__b.val[0], \
12342 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12343 __temp.val[1] \
12344 = vcombine_##funcsuffix (__b.val[1], \
12345 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12346 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12347 (signedtype) __temp.val[0], 0); \
12348 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12349 (signedtype) __temp.val[1], 1); \
12350 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12351 __ptr, __o, __c); \
12354 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
12355 float32x4_t)
12356 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
12357 float64x2_t)
12358 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
12359 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
12360 int16x8_t)
12361 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
12362 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
12363 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
12364 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
12365 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
12366 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
12367 int16x8_t)
12368 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
12369 int32x4_t)
12370 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
12371 int64x2_t)
12373 #undef __ST2_LANE_FUNC
12374 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12375 __extension__ static __inline void \
12376 __attribute__ ((__always_inline__)) \
12377 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
12378 intype __b, const int __c) \
12380 union { intype __i; \
12381 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
12382 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12383 __ptr, __temp.__o, __c); \
12386 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
12387 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
12388 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
12389 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
12390 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
12391 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
12392 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
12393 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
12394 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
12395 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
12396 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
12397 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
12399 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
12400 mode, ptr_mode, funcsuffix, signedtype) \
12401 __extension__ static __inline void \
12402 __attribute__ ((__always_inline__)) \
12403 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
12404 intype __b, const int __c) \
12406 __builtin_aarch64_simd_ci __o; \
12407 largetype __temp; \
12408 __temp.val[0] \
12409 = vcombine_##funcsuffix (__b.val[0], \
12410 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12411 __temp.val[1] \
12412 = vcombine_##funcsuffix (__b.val[1], \
12413 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12414 __temp.val[2] \
12415 = vcombine_##funcsuffix (__b.val[2], \
12416 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12417 __o = __builtin_aarch64_set_qregci##mode (__o, \
12418 (signedtype) __temp.val[0], 0); \
12419 __o = __builtin_aarch64_set_qregci##mode (__o, \
12420 (signedtype) __temp.val[1], 1); \
12421 __o = __builtin_aarch64_set_qregci##mode (__o, \
12422 (signedtype) __temp.val[2], 2); \
12423 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12424 __ptr, __o, __c); \
12427 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
12428 float32x4_t)
12429 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
12430 float64x2_t)
12431 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
12432 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
12433 int16x8_t)
12434 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
12435 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
12436 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
12437 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
12438 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
12439 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
12440 int16x8_t)
12441 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
12442 int32x4_t)
12443 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
12444 int64x2_t)
12446 #undef __ST3_LANE_FUNC
12447 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12448 __extension__ static __inline void \
12449 __attribute__ ((__always_inline__)) \
12450 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
12451 intype __b, const int __c) \
12453 union { intype __i; \
12454 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
12455 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12456 __ptr, __temp.__o, __c); \
12459 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
12460 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
12461 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
12462 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
12463 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
12464 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
12465 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
12466 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
12467 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
12468 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
12469 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
12470 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
12472 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
12473 mode, ptr_mode, funcsuffix, signedtype) \
12474 __extension__ static __inline void \
12475 __attribute__ ((__always_inline__)) \
12476 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
12477 intype __b, const int __c) \
12479 __builtin_aarch64_simd_xi __o; \
12480 largetype __temp; \
12481 __temp.val[0] \
12482 = vcombine_##funcsuffix (__b.val[0], \
12483 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12484 __temp.val[1] \
12485 = vcombine_##funcsuffix (__b.val[1], \
12486 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12487 __temp.val[2] \
12488 = vcombine_##funcsuffix (__b.val[2], \
12489 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12490 __temp.val[3] \
12491 = vcombine_##funcsuffix (__b.val[3], \
12492 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12493 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12494 (signedtype) __temp.val[0], 0); \
12495 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12496 (signedtype) __temp.val[1], 1); \
12497 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12498 (signedtype) __temp.val[2], 2); \
12499 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12500 (signedtype) __temp.val[3], 3); \
12501 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12502 __ptr, __o, __c); \
12505 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
12506 float32x4_t)
12507 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
12508 float64x2_t)
12509 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
12510 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
12511 int16x8_t)
12512 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
12513 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
12514 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
12515 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
12516 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
12517 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
12518 int16x8_t)
12519 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
12520 int32x4_t)
12521 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
12522 int64x2_t)
12524 #undef __ST4_LANE_FUNC
12525 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12526 __extension__ static __inline void \
12527 __attribute__ ((__always_inline__)) \
12528 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
12529 intype __b, const int __c) \
12531 union { intype __i; \
12532 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
12533 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12534 __ptr, __temp.__o, __c); \
12537 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
12538 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
12539 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
12540 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
12541 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
12542 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
12543 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
12544 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
12545 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
12546 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
12547 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
12548 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
12550 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12551 vaddlv_s32 (int32x2_t a)
12553 int64_t result;
12554 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12555 return result;
12558 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12559 vaddlv_u32 (uint32x2_t a)
12561 uint64_t result;
12562 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12563 return result;
12566 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12567 vpaddd_s64 (int64x2_t __a)
12569 return __builtin_aarch64_addpdi (__a);
12572 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12573 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12575 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
12578 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12579 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12581 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
12584 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12585 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12587 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
12590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12591 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12593 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
12596 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12597 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12599 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
12602 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12603 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12605 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
12608 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12609 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12611 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
12614 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12615 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12617 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
12620 /* Table intrinsics. */
12622 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12623 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
12625 poly8x8_t result;
12626 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12627 : "=w"(result)
12628 : "w"(a), "w"(b)
12629 : /* No clobbers */);
12630 return result;
12633 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12634 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
12636 int8x8_t result;
12637 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12638 : "=w"(result)
12639 : "w"(a), "w"(b)
12640 : /* No clobbers */);
12641 return result;
12644 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12645 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
12647 uint8x8_t result;
12648 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12649 : "=w"(result)
12650 : "w"(a), "w"(b)
12651 : /* No clobbers */);
12652 return result;
12655 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12656 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
12658 poly8x16_t result;
12659 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12660 : "=w"(result)
12661 : "w"(a), "w"(b)
12662 : /* No clobbers */);
12663 return result;
12666 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12667 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
12669 int8x16_t result;
12670 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12671 : "=w"(result)
12672 : "w"(a), "w"(b)
12673 : /* No clobbers */);
12674 return result;
12677 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12678 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
12680 uint8x16_t result;
12681 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12682 : "=w"(result)
12683 : "w"(a), "w"(b)
12684 : /* No clobbers */);
12685 return result;
12688 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12689 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
12691 int8x8_t result;
12692 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12693 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12694 :"=w"(result)
12695 :"Q"(tab),"w"(idx)
12696 :"memory", "v16", "v17");
12697 return result;
12700 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12701 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
12703 uint8x8_t result;
12704 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12705 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12706 :"=w"(result)
12707 :"Q"(tab),"w"(idx)
12708 :"memory", "v16", "v17");
12709 return result;
12712 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12713 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
12715 poly8x8_t result;
12716 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12717 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12718 :"=w"(result)
12719 :"Q"(tab),"w"(idx)
12720 :"memory", "v16", "v17");
12721 return result;
12724 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12725 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
12727 int8x16_t result;
12728 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12729 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12730 :"=w"(result)
12731 :"Q"(tab),"w"(idx)
12732 :"memory", "v16", "v17");
12733 return result;
12736 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12737 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
12739 uint8x16_t result;
12740 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12741 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12742 :"=w"(result)
12743 :"Q"(tab),"w"(idx)
12744 :"memory", "v16", "v17");
12745 return result;
12748 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12749 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
12751 poly8x16_t result;
12752 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12753 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12754 :"=w"(result)
12755 :"Q"(tab),"w"(idx)
12756 :"memory", "v16", "v17");
12757 return result;
12760 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12761 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
12763 int8x8_t result;
12764 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12765 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12766 :"=w"(result)
12767 :"Q"(tab),"w"(idx)
12768 :"memory", "v16", "v17", "v18");
12769 return result;
12772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12773 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
12775 uint8x8_t result;
12776 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12777 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12778 :"=w"(result)
12779 :"Q"(tab),"w"(idx)
12780 :"memory", "v16", "v17", "v18");
12781 return result;
12784 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12785 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
12787 poly8x8_t result;
12788 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12789 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12790 :"=w"(result)
12791 :"Q"(tab),"w"(idx)
12792 :"memory", "v16", "v17", "v18");
12793 return result;
12796 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12797 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
12799 int8x16_t result;
12800 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12801 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12802 :"=w"(result)
12803 :"Q"(tab),"w"(idx)
12804 :"memory", "v16", "v17", "v18");
12805 return result;
12808 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12809 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
12811 uint8x16_t result;
12812 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12813 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12814 :"=w"(result)
12815 :"Q"(tab),"w"(idx)
12816 :"memory", "v16", "v17", "v18");
12817 return result;
12820 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12821 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
12823 poly8x16_t result;
12824 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12825 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12826 :"=w"(result)
12827 :"Q"(tab),"w"(idx)
12828 :"memory", "v16", "v17", "v18");
12829 return result;
12832 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12833 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
12835 int8x8_t result;
12836 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12837 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12838 :"=w"(result)
12839 :"Q"(tab),"w"(idx)
12840 :"memory", "v16", "v17", "v18", "v19");
12841 return result;
12844 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12845 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
12847 uint8x8_t result;
12848 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12849 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12850 :"=w"(result)
12851 :"Q"(tab),"w"(idx)
12852 :"memory", "v16", "v17", "v18", "v19");
12853 return result;
12856 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12857 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
12859 poly8x8_t result;
12860 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12861 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12862 :"=w"(result)
12863 :"Q"(tab),"w"(idx)
12864 :"memory", "v16", "v17", "v18", "v19");
12865 return result;
12869 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12870 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
12872 int8x16_t result;
12873 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12874 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12875 :"=w"(result)
12876 :"Q"(tab),"w"(idx)
12877 :"memory", "v16", "v17", "v18", "v19");
12878 return result;
12881 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12882 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
12884 uint8x16_t result;
12885 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12886 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12887 :"=w"(result)
12888 :"Q"(tab),"w"(idx)
12889 :"memory", "v16", "v17", "v18", "v19");
12890 return result;
12893 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12894 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
12896 poly8x16_t result;
12897 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12898 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12899 :"=w"(result)
12900 :"Q"(tab),"w"(idx)
12901 :"memory", "v16", "v17", "v18", "v19");
12902 return result;
12906 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12907 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
12909 int8x8_t result = r;
12910 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12911 : "+w"(result)
12912 : "w"(tab), "w"(idx)
12913 : /* No clobbers */);
12914 return result;
12917 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12918 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
12920 uint8x8_t result = r;
12921 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12922 : "+w"(result)
12923 : "w"(tab), "w"(idx)
12924 : /* No clobbers */);
12925 return result;
12928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12929 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
12931 poly8x8_t result = r;
12932 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12933 : "+w"(result)
12934 : "w"(tab), "w"(idx)
12935 : /* No clobbers */);
12936 return result;
12939 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12940 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
12942 int8x16_t result = r;
12943 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12944 : "+w"(result)
12945 : "w"(tab), "w"(idx)
12946 : /* No clobbers */);
12947 return result;
12950 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12951 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
12953 uint8x16_t result = r;
12954 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12955 : "+w"(result)
12956 : "w"(tab), "w"(idx)
12957 : /* No clobbers */);
12958 return result;
12961 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12962 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
12964 poly8x16_t result = r;
12965 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12966 : "+w"(result)
12967 : "w"(tab), "w"(idx)
12968 : /* No clobbers */);
12969 return result;
12972 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12973 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
12975 int8x8_t result = r;
12976 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12977 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12978 :"+w"(result)
12979 :"Q"(tab),"w"(idx)
12980 :"memory", "v16", "v17");
12981 return result;
12984 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12985 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
12987 uint8x8_t result = r;
12988 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12989 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12990 :"+w"(result)
12991 :"Q"(tab),"w"(idx)
12992 :"memory", "v16", "v17");
12993 return result;
12996 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12997 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
12999 poly8x8_t result = r;
13000 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
13001 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
13002 :"+w"(result)
13003 :"Q"(tab),"w"(idx)
13004 :"memory", "v16", "v17");
13005 return result;
13009 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13010 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
13012 int8x16_t result = r;
13013 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
13014 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
13015 :"+w"(result)
13016 :"Q"(tab),"w"(idx)
13017 :"memory", "v16", "v17");
13018 return result;
13021 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13022 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
13024 uint8x16_t result = r;
13025 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
13026 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
13027 :"+w"(result)
13028 :"Q"(tab),"w"(idx)
13029 :"memory", "v16", "v17");
13030 return result;
13033 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13034 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
13036 poly8x16_t result = r;
13037 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
13038 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
13039 :"+w"(result)
13040 :"Q"(tab),"w"(idx)
13041 :"memory", "v16", "v17");
13042 return result;
13046 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13047 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
13049 int8x8_t result = r;
13050 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13051 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
13052 :"+w"(result)
13053 :"Q"(tab),"w"(idx)
13054 :"memory", "v16", "v17", "v18");
13055 return result;
13058 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13059 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
13061 uint8x8_t result = r;
13062 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13063 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
13064 :"+w"(result)
13065 :"Q"(tab),"w"(idx)
13066 :"memory", "v16", "v17", "v18");
13067 return result;
13070 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13071 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
13073 poly8x8_t result = r;
13074 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13075 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
13076 :"+w"(result)
13077 :"Q"(tab),"w"(idx)
13078 :"memory", "v16", "v17", "v18");
13079 return result;
13083 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13084 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
13086 int8x16_t result = r;
13087 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13088 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13089 :"+w"(result)
13090 :"Q"(tab),"w"(idx)
13091 :"memory", "v16", "v17", "v18");
13092 return result;
13095 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13096 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
13098 uint8x16_t result = r;
13099 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13100 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13101 :"+w"(result)
13102 :"Q"(tab),"w"(idx)
13103 :"memory", "v16", "v17", "v18");
13104 return result;
13107 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13108 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
13110 poly8x16_t result = r;
13111 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13112 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13113 :"+w"(result)
13114 :"Q"(tab),"w"(idx)
13115 :"memory", "v16", "v17", "v18");
13116 return result;
13120 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13121 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
13123 int8x8_t result = r;
13124 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13125 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13126 :"+w"(result)
13127 :"Q"(tab),"w"(idx)
13128 :"memory", "v16", "v17", "v18", "v19");
13129 return result;
13132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13133 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
13135 uint8x8_t result = r;
13136 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13137 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13138 :"+w"(result)
13139 :"Q"(tab),"w"(idx)
13140 :"memory", "v16", "v17", "v18", "v19");
13141 return result;
13144 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13145 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
13147 poly8x8_t result = r;
13148 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13149 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13150 :"+w"(result)
13151 :"Q"(tab),"w"(idx)
13152 :"memory", "v16", "v17", "v18", "v19");
13153 return result;
13157 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13158 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
13160 int8x16_t result = r;
13161 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13162 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13163 :"+w"(result)
13164 :"Q"(tab),"w"(idx)
13165 :"memory", "v16", "v17", "v18", "v19");
13166 return result;
13169 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13170 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
13172 uint8x16_t result = r;
13173 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13174 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13175 :"+w"(result)
13176 :"Q"(tab),"w"(idx)
13177 :"memory", "v16", "v17", "v18", "v19");
13178 return result;
13181 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13182 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
13184 poly8x16_t result = r;
13185 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13186 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13187 :"+w"(result)
13188 :"Q"(tab),"w"(idx)
13189 :"memory", "v16", "v17", "v18", "v19");
13190 return result;
13193 /* V7 legacy table intrinsics. */
13195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13196 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
13198 int8x8_t result;
13199 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13200 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13201 : "=w"(result)
13202 : "w"(temp), "w"(idx)
13203 : /* No clobbers */);
13204 return result;
13207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13208 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
13210 uint8x8_t result;
13211 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13212 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13213 : "=w"(result)
13214 : "w"(temp), "w"(idx)
13215 : /* No clobbers */);
13216 return result;
13219 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13220 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
13222 poly8x8_t result;
13223 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13224 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13225 : "=w"(result)
13226 : "w"(temp), "w"(idx)
13227 : /* No clobbers */);
13228 return result;
13231 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13232 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
13234 int8x8_t result;
13235 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13236 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13237 : "=w"(result)
13238 : "w"(temp), "w"(idx)
13239 : /* No clobbers */);
13240 return result;
13243 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13244 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
13246 uint8x8_t result;
13247 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13248 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13249 : "=w"(result)
13250 : "w"(temp), "w"(idx)
13251 : /* No clobbers */);
13252 return result;
13255 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13256 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
13258 poly8x8_t result;
13259 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13260 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13261 : "=w"(result)
13262 : "w"(temp), "w"(idx)
13263 : /* No clobbers */);
13264 return result;
13267 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13268 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
13270 int8x8_t result;
13271 int8x16x2_t temp;
13272 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13273 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13274 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13275 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13276 : "=w"(result)
13277 : "Q"(temp), "w"(idx)
13278 : "v16", "v17", "memory");
13279 return result;
13282 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13283 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
13285 uint8x8_t result;
13286 uint8x16x2_t temp;
13287 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13288 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13289 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13290 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13291 : "=w"(result)
13292 : "Q"(temp), "w"(idx)
13293 : "v16", "v17", "memory");
13294 return result;
13297 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13298 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
13300 poly8x8_t result;
13301 poly8x16x2_t temp;
13302 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13303 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13304 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13305 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13306 : "=w"(result)
13307 : "Q"(temp), "w"(idx)
13308 : "v16", "v17", "memory");
13309 return result;
13312 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13313 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
13315 int8x8_t result;
13316 int8x16x2_t temp;
13317 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13318 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13319 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13320 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13321 : "=w"(result)
13322 : "Q"(temp), "w"(idx)
13323 : "v16", "v17", "memory");
13324 return result;
13327 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13328 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
13330 uint8x8_t result;
13331 uint8x16x2_t temp;
13332 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13333 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13334 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13335 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13336 : "=w"(result)
13337 : "Q"(temp), "w"(idx)
13338 : "v16", "v17", "memory");
13339 return result;
13342 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13343 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
13345 poly8x8_t result;
13346 poly8x16x2_t temp;
13347 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13348 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13349 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13350 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13351 : "=w"(result)
13352 : "Q"(temp), "w"(idx)
13353 : "v16", "v17", "memory");
13354 return result;
13357 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13358 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
13360 int8x8_t result = r;
13361 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13362 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13363 : "+w"(result)
13364 : "w"(temp), "w"(idx)
13365 : /* No clobbers */);
13366 return result;
13369 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13370 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
13372 uint8x8_t result = r;
13373 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13374 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13375 : "+w"(result)
13376 : "w"(temp), "w"(idx)
13377 : /* No clobbers */);
13378 return result;
13381 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13382 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
13384 poly8x8_t result = r;
13385 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13386 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13387 : "+w"(result)
13388 : "w"(temp), "w"(idx)
13389 : /* No clobbers */);
13390 return result;
13393 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13394 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
13396 int8x8_t result = r;
13397 int8x16x2_t temp;
13398 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13399 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13400 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13401 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13402 : "+w"(result)
13403 : "Q"(temp), "w"(idx)
13404 : "v16", "v17", "memory");
13405 return result;
13408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13409 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
13411 uint8x8_t result = r;
13412 uint8x16x2_t temp;
13413 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13414 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13415 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13416 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13417 : "+w"(result)
13418 : "Q"(temp), "w"(idx)
13419 : "v16", "v17", "memory");
13420 return result;
13423 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13424 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
13426 poly8x8_t result = r;
13427 poly8x16x2_t temp;
13428 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13429 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13430 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13431 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13432 : "+w"(result)
13433 : "Q"(temp), "w"(idx)
13434 : "v16", "v17", "memory");
13435 return result;
13438 /* End of temporary inline asm. */
13440 /* Start of optimal implementations in approved order. */
13442 /* vabs */
13444 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13445 vabs_f32 (float32x2_t __a)
13447 return __builtin_aarch64_absv2sf (__a);
13450 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13451 vabs_f64 (float64x1_t __a)
13453 return (float64x1_t) {__builtin_fabs (__a[0])};
13456 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13457 vabs_s8 (int8x8_t __a)
13459 return __builtin_aarch64_absv8qi (__a);
13462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13463 vabs_s16 (int16x4_t __a)
13465 return __builtin_aarch64_absv4hi (__a);
13468 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13469 vabs_s32 (int32x2_t __a)
13471 return __builtin_aarch64_absv2si (__a);
13474 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13475 vabs_s64 (int64x1_t __a)
13477 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
13480 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13481 vabsq_f32 (float32x4_t __a)
13483 return __builtin_aarch64_absv4sf (__a);
13486 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13487 vabsq_f64 (float64x2_t __a)
13489 return __builtin_aarch64_absv2df (__a);
13492 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13493 vabsq_s8 (int8x16_t __a)
13495 return __builtin_aarch64_absv16qi (__a);
13498 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13499 vabsq_s16 (int16x8_t __a)
13501 return __builtin_aarch64_absv8hi (__a);
13504 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13505 vabsq_s32 (int32x4_t __a)
13507 return __builtin_aarch64_absv4si (__a);
13510 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13511 vabsq_s64 (int64x2_t __a)
13513 return __builtin_aarch64_absv2di (__a);
13516 /* vadd */
13518 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13519 vaddd_s64 (int64_t __a, int64_t __b)
13521 return __a + __b;
13524 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13525 vaddd_u64 (uint64_t __a, uint64_t __b)
13527 return __a + __b;
13530 /* vaddv */
13532 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13533 vaddv_s8 (int8x8_t __a)
13535 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
13538 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13539 vaddv_s16 (int16x4_t __a)
13541 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
13544 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13545 vaddv_s32 (int32x2_t __a)
13547 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
13550 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13551 vaddv_u8 (uint8x8_t __a)
13553 return vget_lane_u8 ((uint8x8_t)
13554 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
13558 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13559 vaddv_u16 (uint16x4_t __a)
13561 return vget_lane_u16 ((uint16x4_t)
13562 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
13566 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13567 vaddv_u32 (uint32x2_t __a)
13569 return vget_lane_u32 ((uint32x2_t)
13570 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
13574 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13575 vaddvq_s8 (int8x16_t __a)
13577 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
13581 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13582 vaddvq_s16 (int16x8_t __a)
13584 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
13587 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13588 vaddvq_s32 (int32x4_t __a)
13590 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
13593 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13594 vaddvq_s64 (int64x2_t __a)
13596 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
13599 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13600 vaddvq_u8 (uint8x16_t __a)
13602 return vgetq_lane_u8 ((uint8x16_t)
13603 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
13607 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13608 vaddvq_u16 (uint16x8_t __a)
13610 return vgetq_lane_u16 ((uint16x8_t)
13611 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
13615 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13616 vaddvq_u32 (uint32x4_t __a)
13618 return vgetq_lane_u32 ((uint32x4_t)
13619 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
13623 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13624 vaddvq_u64 (uint64x2_t __a)
13626 return vgetq_lane_u64 ((uint64x2_t)
13627 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
13631 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13632 vaddv_f32 (float32x2_t __a)
13634 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
13635 return vget_lane_f32 (__t, 0);
13638 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13639 vaddvq_f32 (float32x4_t __a)
13641 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
13642 return vgetq_lane_f32 (__t, 0);
13645 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13646 vaddvq_f64 (float64x2_t __a)
13648 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
13649 return vgetq_lane_f64 (__t, 0);
13652 /* vbsl */
13654 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13655 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
13657 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
13660 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13661 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
13663 return (float64x1_t)
13664 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
13667 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13668 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
13670 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
13673 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13674 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
13676 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
13679 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13680 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
13682 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
13685 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13686 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
13688 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
13691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13692 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
13694 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
13697 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13698 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
13700 return (int64x1_t)
13701 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
13704 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13705 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
13707 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
13710 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13711 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
13713 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
13716 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13717 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
13719 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
13722 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13723 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
13725 return (uint64x1_t)
13726 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
13729 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13730 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
13732 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
13735 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13736 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
13738 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
13741 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13742 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
13744 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
13747 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13748 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
13750 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
13753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13754 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
13756 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
13759 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13760 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
13762 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
13765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13766 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
13768 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
13771 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13772 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
13774 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
13777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13778 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
13780 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
13783 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13784 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
13786 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
13789 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13790 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
13792 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
13795 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13796 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
13798 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
13801 #ifdef __ARM_FEATURE_CRYPTO
13803 /* vaes */
13805 static __inline uint8x16_t
13806 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
13808 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
13811 static __inline uint8x16_t
13812 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
13814 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
13817 static __inline uint8x16_t
13818 vaesmcq_u8 (uint8x16_t data)
13820 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
13823 static __inline uint8x16_t
13824 vaesimcq_u8 (uint8x16_t data)
13826 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
13829 #endif
13831 /* vcage */
13833 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13834 vcage_f64 (float64x1_t __a, float64x1_t __b)
13836 return vabs_f64 (__a) >= vabs_f64 (__b);
13839 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13840 vcages_f32 (float32_t __a, float32_t __b)
13842 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
13845 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13846 vcage_f32 (float32x2_t __a, float32x2_t __b)
13848 return vabs_f32 (__a) >= vabs_f32 (__b);
13851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13852 vcageq_f32 (float32x4_t __a, float32x4_t __b)
13854 return vabsq_f32 (__a) >= vabsq_f32 (__b);
13857 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13858 vcaged_f64 (float64_t __a, float64_t __b)
13860 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
13863 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13864 vcageq_f64 (float64x2_t __a, float64x2_t __b)
13866 return vabsq_f64 (__a) >= vabsq_f64 (__b);
13869 /* vcagt */
13871 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13872 vcagts_f32 (float32_t __a, float32_t __b)
13874 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
13877 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13878 vcagt_f32 (float32x2_t __a, float32x2_t __b)
13880 return vabs_f32 (__a) > vabs_f32 (__b);
13883 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13884 vcagt_f64 (float64x1_t __a, float64x1_t __b)
13886 return vabs_f64 (__a) > vabs_f64 (__b);
13889 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13890 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
13892 return vabsq_f32 (__a) > vabsq_f32 (__b);
13895 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13896 vcagtd_f64 (float64_t __a, float64_t __b)
13898 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
13901 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13902 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
13904 return vabsq_f64 (__a) > vabsq_f64 (__b);
13907 /* vcale */
13909 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13910 vcale_f32 (float32x2_t __a, float32x2_t __b)
13912 return vabs_f32 (__a) <= vabs_f32 (__b);
13915 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13916 vcale_f64 (float64x1_t __a, float64x1_t __b)
13918 return vabs_f64 (__a) <= vabs_f64 (__b);
13921 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13922 vcaled_f64 (float64_t __a, float64_t __b)
13924 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
13927 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13928 vcales_f32 (float32_t __a, float32_t __b)
13930 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
13933 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13934 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
13936 return vabsq_f32 (__a) <= vabsq_f32 (__b);
13939 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13940 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
13942 return vabsq_f64 (__a) <= vabsq_f64 (__b);
13945 /* vcalt */
13947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13948 vcalt_f32 (float32x2_t __a, float32x2_t __b)
13950 return vabs_f32 (__a) < vabs_f32 (__b);
13953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13954 vcalt_f64 (float64x1_t __a, float64x1_t __b)
13956 return vabs_f64 (__a) < vabs_f64 (__b);
13959 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13960 vcaltd_f64 (float64_t __a, float64_t __b)
13962 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
13965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13966 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
13968 return vabsq_f32 (__a) < vabsq_f32 (__b);
13971 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13972 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
13974 return vabsq_f64 (__a) < vabsq_f64 (__b);
13977 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13978 vcalts_f32 (float32_t __a, float32_t __b)
13980 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
13983 /* vceq - vector. */
13985 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13986 vceq_f32 (float32x2_t __a, float32x2_t __b)
13988 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
13991 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13992 vceq_f64 (float64x1_t __a, float64x1_t __b)
13994 return (uint64x1_t) (__a == __b);
13997 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13998 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
14000 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14001 (int8x8_t) __b);
14004 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14005 vceq_s8 (int8x8_t __a, int8x8_t __b)
14007 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
14010 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14011 vceq_s16 (int16x4_t __a, int16x4_t __b)
14013 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
14016 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14017 vceq_s32 (int32x2_t __a, int32x2_t __b)
14019 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
14022 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14023 vceq_s64 (int64x1_t __a, int64x1_t __b)
14025 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
14028 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14029 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
14031 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14032 (int8x8_t) __b);
14035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14036 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
14038 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
14039 (int16x4_t) __b);
14042 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14043 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
14045 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
14046 (int32x2_t) __b);
14049 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14050 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
14052 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
14055 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14056 vceqq_f32 (float32x4_t __a, float32x4_t __b)
14058 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14061 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14062 vceqq_f64 (float64x2_t __a, float64x2_t __b)
14064 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14067 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14068 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
14070 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14071 (int8x16_t) __b);
14074 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14075 vceqq_s8 (int8x16_t __a, int8x16_t __b)
14077 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14080 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14081 vceqq_s16 (int16x8_t __a, int16x8_t __b)
14083 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14086 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14087 vceqq_s32 (int32x4_t __a, int32x4_t __b)
14089 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14092 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14093 vceqq_s64 (int64x2_t __a, int64x2_t __b)
14095 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14098 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14099 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
14101 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14102 (int8x16_t) __b);
14105 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14106 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
14108 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14109 (int16x8_t) __b);
14112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14113 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
14115 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14116 (int32x4_t) __b);
14119 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14120 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
14122 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14123 (int64x2_t) __b);
14126 /* vceq - scalar. */
14128 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14129 vceqs_f32 (float32_t __a, float32_t __b)
14131 return __a == __b ? -1 : 0;
14134 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14135 vceqd_s64 (int64_t __a, int64_t __b)
14137 return __a == __b ? -1ll : 0ll;
14140 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14141 vceqd_u64 (uint64_t __a, uint64_t __b)
14143 return __a == __b ? -1ll : 0ll;
14146 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14147 vceqd_f64 (float64_t __a, float64_t __b)
14149 return __a == __b ? -1ll : 0ll;
14152 /* vceqz - vector. */
14154 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14155 vceqz_f32 (float32x2_t __a)
14157 float32x2_t __b = {0.0f, 0.0f};
14158 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
14161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14162 vceqz_f64 (float64x1_t __a)
14164 return (uint64x1_t) (__a == (float64x1_t) {0.0});
14167 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14168 vceqz_p8 (poly8x8_t __a)
14170 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14171 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14172 (int8x8_t) __b);
14175 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14176 vceqz_s8 (int8x8_t __a)
14178 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14179 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
14182 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14183 vceqz_s16 (int16x4_t __a)
14185 int16x4_t __b = {0, 0, 0, 0};
14186 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
14189 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14190 vceqz_s32 (int32x2_t __a)
14192 int32x2_t __b = {0, 0};
14193 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
14196 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14197 vceqz_s64 (int64x1_t __a)
14199 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14202 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14203 vceqz_u8 (uint8x8_t __a)
14205 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14206 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14207 (int8x8_t) __b);
14210 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14211 vceqz_u16 (uint16x4_t __a)
14213 uint16x4_t __b = {0, 0, 0, 0};
14214 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
14215 (int16x4_t) __b);
14218 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14219 vceqz_u32 (uint32x2_t __a)
14221 uint32x2_t __b = {0, 0};
14222 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
14223 (int32x2_t) __b);
14226 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14227 vceqz_u64 (uint64x1_t __a)
14229 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14232 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14233 vceqzq_f32 (float32x4_t __a)
14235 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14236 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14239 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14240 vceqzq_f64 (float64x2_t __a)
14242 float64x2_t __b = {0.0, 0.0};
14243 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14246 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14247 vceqzq_p8 (poly8x16_t __a)
14249 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14250 0, 0, 0, 0, 0, 0, 0, 0};
14251 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14252 (int8x16_t) __b);
14255 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14256 vceqzq_s8 (int8x16_t __a)
14258 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14259 0, 0, 0, 0, 0, 0, 0, 0};
14260 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14263 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14264 vceqzq_s16 (int16x8_t __a)
14266 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14267 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14270 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14271 vceqzq_s32 (int32x4_t __a)
14273 int32x4_t __b = {0, 0, 0, 0};
14274 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14277 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14278 vceqzq_s64 (int64x2_t __a)
14280 int64x2_t __b = {0, 0};
14281 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14284 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14285 vceqzq_u8 (uint8x16_t __a)
14287 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14288 0, 0, 0, 0, 0, 0, 0, 0};
14289 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14290 (int8x16_t) __b);
14293 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14294 vceqzq_u16 (uint16x8_t __a)
14296 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14297 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14298 (int16x8_t) __b);
14301 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14302 vceqzq_u32 (uint32x4_t __a)
14304 uint32x4_t __b = {0, 0, 0, 0};
14305 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14306 (int32x4_t) __b);
14309 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14310 vceqzq_u64 (uint64x2_t __a)
14312 uint64x2_t __b = {0, 0};
14313 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14314 (int64x2_t) __b);
14317 /* vceqz - scalar. */
14319 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14320 vceqzs_f32 (float32_t __a)
14322 return __a == 0.0f ? -1 : 0;
14325 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14326 vceqzd_s64 (int64_t __a)
14328 return __a == 0 ? -1ll : 0ll;
14331 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14332 vceqzd_u64 (uint64_t __a)
14334 return __a == 0 ? -1ll : 0ll;
14337 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14338 vceqzd_f64 (float64_t __a)
14340 return __a == 0.0 ? -1ll : 0ll;
14343 /* vcge - vector. */
14345 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14346 vcge_f32 (float32x2_t __a, float32x2_t __b)
14348 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14351 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14352 vcge_f64 (float64x1_t __a, float64x1_t __b)
14354 return (uint64x1_t) (__a >= __b);
14357 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14358 vcge_s8 (int8x8_t __a, int8x8_t __b)
14360 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14363 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14364 vcge_s16 (int16x4_t __a, int16x4_t __b)
14366 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14370 vcge_s32 (int32x2_t __a, int32x2_t __b)
14372 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14376 vcge_s64 (int64x1_t __a, int64x1_t __b)
14378 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14381 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14382 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
14384 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
14385 (int8x8_t) __b);
14388 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14389 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
14391 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
14392 (int16x4_t) __b);
14395 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14396 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
14398 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
14399 (int32x2_t) __b);
14402 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14403 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
14405 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14408 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14409 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
14411 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14414 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14415 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
14417 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14420 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14421 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
14423 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14426 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14427 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
14429 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14432 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14433 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
14435 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14438 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14439 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
14441 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14444 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14445 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
14447 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
14448 (int8x16_t) __b);
14451 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14452 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
14454 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
14455 (int16x8_t) __b);
14458 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14459 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
14461 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
14462 (int32x4_t) __b);
14465 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14466 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
14468 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
14469 (int64x2_t) __b);
14472 /* vcge - scalar. */
14474 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14475 vcges_f32 (float32_t __a, float32_t __b)
14477 return __a >= __b ? -1 : 0;
14480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14481 vcged_s64 (int64_t __a, int64_t __b)
14483 return __a >= __b ? -1ll : 0ll;
14486 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14487 vcged_u64 (uint64_t __a, uint64_t __b)
14489 return __a >= __b ? -1ll : 0ll;
14492 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14493 vcged_f64 (float64_t __a, float64_t __b)
14495 return __a >= __b ? -1ll : 0ll;
14498 /* vcgez - vector. */
14500 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14501 vcgez_f32 (float32x2_t __a)
14503 float32x2_t __b = {0.0f, 0.0f};
14504 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14507 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14508 vcgez_f64 (float64x1_t __a)
14510 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
14513 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14514 vcgez_s8 (int8x8_t __a)
14516 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14517 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14520 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14521 vcgez_s16 (int16x4_t __a)
14523 int16x4_t __b = {0, 0, 0, 0};
14524 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14527 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14528 vcgez_s32 (int32x2_t __a)
14530 int32x2_t __b = {0, 0};
14531 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14534 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14535 vcgez_s64 (int64x1_t __a)
14537 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
14540 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14541 vcgezq_f32 (float32x4_t __a)
14543 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14544 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14547 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14548 vcgezq_f64 (float64x2_t __a)
14550 float64x2_t __b = {0.0, 0.0};
14551 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14554 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14555 vcgezq_s8 (int8x16_t __a)
14557 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14558 0, 0, 0, 0, 0, 0, 0, 0};
14559 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14562 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14563 vcgezq_s16 (int16x8_t __a)
14565 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14566 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14569 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14570 vcgezq_s32 (int32x4_t __a)
14572 int32x4_t __b = {0, 0, 0, 0};
14573 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14576 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14577 vcgezq_s64 (int64x2_t __a)
14579 int64x2_t __b = {0, 0};
14580 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14583 /* vcgez - scalar. */
14585 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14586 vcgezs_f32 (float32_t __a)
14588 return __a >= 0.0f ? -1 : 0;
14591 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14592 vcgezd_s64 (int64_t __a)
14594 return __a >= 0 ? -1ll : 0ll;
14597 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14598 vcgezd_f64 (float64_t __a)
14600 return __a >= 0.0 ? -1ll : 0ll;
14603 /* vcgt - vector. */
14605 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14606 vcgt_f32 (float32x2_t __a, float32x2_t __b)
14608 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14611 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14612 vcgt_f64 (float64x1_t __a, float64x1_t __b)
14614 return (uint64x1_t) (__a > __b);
14617 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14618 vcgt_s8 (int8x8_t __a, int8x8_t __b)
14620 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14623 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14624 vcgt_s16 (int16x4_t __a, int16x4_t __b)
14626 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14629 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14630 vcgt_s32 (int32x2_t __a, int32x2_t __b)
14632 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14635 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14636 vcgt_s64 (int64x1_t __a, int64x1_t __b)
14638 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14642 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
14644 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
14645 (int8x8_t) __b);
14648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14649 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
14651 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
14652 (int16x4_t) __b);
14655 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14656 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
14658 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
14659 (int32x2_t) __b);
14662 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14663 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
14665 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14668 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14669 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
14671 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14675 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
14677 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14681 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
14683 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14686 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14687 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
14689 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14693 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
14695 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14698 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14699 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
14701 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14704 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14705 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
14707 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
14708 (int8x16_t) __b);
14711 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14712 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
14714 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
14715 (int16x8_t) __b);
14718 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14719 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
14721 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
14722 (int32x4_t) __b);
14725 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14726 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
14728 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
14729 (int64x2_t) __b);
14732 /* vcgt - scalar. */
14734 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14735 vcgts_f32 (float32_t __a, float32_t __b)
14737 return __a > __b ? -1 : 0;
14740 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14741 vcgtd_s64 (int64_t __a, int64_t __b)
14743 return __a > __b ? -1ll : 0ll;
14746 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14747 vcgtd_u64 (uint64_t __a, uint64_t __b)
14749 return __a > __b ? -1ll : 0ll;
14752 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14753 vcgtd_f64 (float64_t __a, float64_t __b)
14755 return __a > __b ? -1ll : 0ll;
14758 /* vcgtz - vector. */
14760 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14761 vcgtz_f32 (float32x2_t __a)
14763 float32x2_t __b = {0.0f, 0.0f};
14764 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14767 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14768 vcgtz_f64 (float64x1_t __a)
14770 return (uint64x1_t) (__a > (float64x1_t) {0.0});
14773 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14774 vcgtz_s8 (int8x8_t __a)
14776 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14777 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14780 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14781 vcgtz_s16 (int16x4_t __a)
14783 int16x4_t __b = {0, 0, 0, 0};
14784 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14787 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14788 vcgtz_s32 (int32x2_t __a)
14790 int32x2_t __b = {0, 0};
14791 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14794 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14795 vcgtz_s64 (int64x1_t __a)
14797 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
14800 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14801 vcgtzq_f32 (float32x4_t __a)
14803 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14804 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14808 vcgtzq_f64 (float64x2_t __a)
14810 float64x2_t __b = {0.0, 0.0};
14811 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14814 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14815 vcgtzq_s8 (int8x16_t __a)
14817 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14818 0, 0, 0, 0, 0, 0, 0, 0};
14819 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14823 vcgtzq_s16 (int16x8_t __a)
14825 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14826 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14829 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14830 vcgtzq_s32 (int32x4_t __a)
14832 int32x4_t __b = {0, 0, 0, 0};
14833 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14836 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14837 vcgtzq_s64 (int64x2_t __a)
14839 int64x2_t __b = {0, 0};
14840 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14843 /* vcgtz - scalar. */
14845 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14846 vcgtzs_f32 (float32_t __a)
14848 return __a > 0.0f ? -1 : 0;
14851 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14852 vcgtzd_s64 (int64_t __a)
14854 return __a > 0 ? -1ll : 0ll;
14857 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14858 vcgtzd_f64 (float64_t __a)
14860 return __a > 0.0 ? -1ll : 0ll;
14863 /* vcle - vector. */
14865 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14866 vcle_f32 (float32x2_t __a, float32x2_t __b)
14868 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
14871 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14872 vcle_f64 (float64x1_t __a, float64x1_t __b)
14874 return (uint64x1_t) (__a <= __b);
14877 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14878 vcle_s8 (int8x8_t __a, int8x8_t __b)
14880 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
14883 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14884 vcle_s16 (int16x4_t __a, int16x4_t __b)
14886 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
14889 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14890 vcle_s32 (int32x2_t __a, int32x2_t __b)
14892 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
14895 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14896 vcle_s64 (int64x1_t __a, int64x1_t __b)
14898 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14901 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14902 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
14904 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
14905 (int8x8_t) __a);
14908 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14909 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
14911 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
14912 (int16x4_t) __a);
14915 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14916 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
14918 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
14919 (int32x2_t) __a);
14922 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14923 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
14925 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14928 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14929 vcleq_f32 (float32x4_t __a, float32x4_t __b)
14931 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
14934 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14935 vcleq_f64 (float64x2_t __a, float64x2_t __b)
14937 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
14940 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14941 vcleq_s8 (int8x16_t __a, int8x16_t __b)
14943 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
14946 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14947 vcleq_s16 (int16x8_t __a, int16x8_t __b)
14949 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
14952 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14953 vcleq_s32 (int32x4_t __a, int32x4_t __b)
14955 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
14958 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14959 vcleq_s64 (int64x2_t __a, int64x2_t __b)
14961 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
14964 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14965 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
14967 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
14968 (int8x16_t) __a);
14971 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14972 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
14974 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
14975 (int16x8_t) __a);
14978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14979 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
14981 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
14982 (int32x4_t) __a);
14985 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14986 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
14988 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
14989 (int64x2_t) __a);
14992 /* vcle - scalar. */
14994 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14995 vcles_f32 (float32_t __a, float32_t __b)
14997 return __a <= __b ? -1 : 0;
15000 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15001 vcled_s64 (int64_t __a, int64_t __b)
15003 return __a <= __b ? -1ll : 0ll;
15006 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15007 vcled_u64 (uint64_t __a, uint64_t __b)
15009 return __a <= __b ? -1ll : 0ll;
15012 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15013 vcled_f64 (float64_t __a, float64_t __b)
15015 return __a <= __b ? -1ll : 0ll;
15018 /* vclez - vector. */
15020 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15021 vclez_f32 (float32x2_t __a)
15023 float32x2_t __b = {0.0f, 0.0f};
15024 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
15027 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15028 vclez_f64 (float64x1_t __a)
15030 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
15033 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15034 vclez_s8 (int8x8_t __a)
15036 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15037 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
15040 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15041 vclez_s16 (int16x4_t __a)
15043 int16x4_t __b = {0, 0, 0, 0};
15044 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
15047 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15048 vclez_s32 (int32x2_t __a)
15050 int32x2_t __b = {0, 0};
15051 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
15054 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15055 vclez_s64 (int64x1_t __a)
15057 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
15060 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15061 vclezq_f32 (float32x4_t __a)
15063 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15064 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
15067 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15068 vclezq_f64 (float64x2_t __a)
15070 float64x2_t __b = {0.0, 0.0};
15071 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
15074 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15075 vclezq_s8 (int8x16_t __a)
15077 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15078 0, 0, 0, 0, 0, 0, 0, 0};
15079 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
15082 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15083 vclezq_s16 (int16x8_t __a)
15085 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15086 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
15089 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15090 vclezq_s32 (int32x4_t __a)
15092 int32x4_t __b = {0, 0, 0, 0};
15093 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
15096 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15097 vclezq_s64 (int64x2_t __a)
15099 int64x2_t __b = {0, 0};
15100 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
15103 /* vclez - scalar. */
15105 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15106 vclezs_f32 (float32_t __a)
15108 return __a <= 0.0f ? -1 : 0;
15111 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15112 vclezd_s64 (int64_t __a)
15114 return __a <= 0 ? -1ll : 0ll;
15117 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15118 vclezd_f64 (float64_t __a)
15120 return __a <= 0.0 ? -1ll : 0ll;
15123 /* vclt - vector. */
15125 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15126 vclt_f32 (float32x2_t __a, float32x2_t __b)
15128 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
15131 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15132 vclt_f64 (float64x1_t __a, float64x1_t __b)
15134 return (uint64x1_t) (__a < __b);
15137 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15138 vclt_s8 (int8x8_t __a, int8x8_t __b)
15140 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
15143 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15144 vclt_s16 (int16x4_t __a, int16x4_t __b)
15146 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
15149 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15150 vclt_s32 (int32x2_t __a, int32x2_t __b)
15152 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
15155 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15156 vclt_s64 (int64x1_t __a, int64x1_t __b)
15158 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15161 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15162 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
15164 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
15165 (int8x8_t) __a);
15168 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15169 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
15171 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
15172 (int16x4_t) __a);
15175 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15176 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
15178 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
15179 (int32x2_t) __a);
15182 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15183 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
15185 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15188 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15189 vcltq_f32 (float32x4_t __a, float32x4_t __b)
15191 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
15194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15195 vcltq_f64 (float64x2_t __a, float64x2_t __b)
15197 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
15200 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15201 vcltq_s8 (int8x16_t __a, int8x16_t __b)
15203 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
15206 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15207 vcltq_s16 (int16x8_t __a, int16x8_t __b)
15209 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
15212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15213 vcltq_s32 (int32x4_t __a, int32x4_t __b)
15215 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
15218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15219 vcltq_s64 (int64x2_t __a, int64x2_t __b)
15221 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
15224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15225 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
15227 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
15228 (int8x16_t) __a);
15231 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15232 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
15234 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
15235 (int16x8_t) __a);
15238 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15239 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
15241 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
15242 (int32x4_t) __a);
15245 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15246 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
15248 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
15249 (int64x2_t) __a);
15252 /* vclt - scalar. */
15254 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15255 vclts_f32 (float32_t __a, float32_t __b)
15257 return __a < __b ? -1 : 0;
15260 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15261 vcltd_s64 (int64_t __a, int64_t __b)
15263 return __a < __b ? -1ll : 0ll;
15266 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15267 vcltd_u64 (uint64_t __a, uint64_t __b)
15269 return __a < __b ? -1ll : 0ll;
15272 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15273 vcltd_f64 (float64_t __a, float64_t __b)
15275 return __a < __b ? -1ll : 0ll;
15278 /* vcltz - vector. */
15280 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15281 vcltz_f32 (float32x2_t __a)
15283 float32x2_t __b = {0.0f, 0.0f};
15284 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
15287 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15288 vcltz_f64 (float64x1_t __a)
15290 return (uint64x1_t) (__a < (float64x1_t) {0.0});
15293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15294 vcltz_s8 (int8x8_t __a)
15296 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15297 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
15300 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15301 vcltz_s16 (int16x4_t __a)
15303 int16x4_t __b = {0, 0, 0, 0};
15304 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
15307 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15308 vcltz_s32 (int32x2_t __a)
15310 int32x2_t __b = {0, 0};
15311 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
15314 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15315 vcltz_s64 (int64x1_t __a)
15317 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
15320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15321 vcltzq_f32 (float32x4_t __a)
15323 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15324 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
15327 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15328 vcltzq_f64 (float64x2_t __a)
15330 float64x2_t __b = {0.0, 0.0};
15331 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
15334 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15335 vcltzq_s8 (int8x16_t __a)
15337 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15338 0, 0, 0, 0, 0, 0, 0, 0};
15339 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
15342 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15343 vcltzq_s16 (int16x8_t __a)
15345 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15346 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
15349 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15350 vcltzq_s32 (int32x4_t __a)
15352 int32x4_t __b = {0, 0, 0, 0};
15353 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
15356 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15357 vcltzq_s64 (int64x2_t __a)
15359 int64x2_t __b = {0, 0};
15360 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
15363 /* vcltz - scalar. */
15365 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15366 vcltzs_f32 (float32_t __a)
15368 return __a < 0.0f ? -1 : 0;
15371 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15372 vcltzd_s64 (int64_t __a)
15374 return __a < 0 ? -1ll : 0ll;
15377 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15378 vcltzd_f64 (float64_t __a)
15380 return __a < 0.0 ? -1ll : 0ll;
15383 /* vclz. */
15385 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15386 vclz_s8 (int8x8_t __a)
15388 return __builtin_aarch64_clzv8qi (__a);
15391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15392 vclz_s16 (int16x4_t __a)
15394 return __builtin_aarch64_clzv4hi (__a);
15397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15398 vclz_s32 (int32x2_t __a)
15400 return __builtin_aarch64_clzv2si (__a);
15403 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15404 vclz_u8 (uint8x8_t __a)
15406 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
15409 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15410 vclz_u16 (uint16x4_t __a)
15412 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
15415 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15416 vclz_u32 (uint32x2_t __a)
15418 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
15421 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15422 vclzq_s8 (int8x16_t __a)
15424 return __builtin_aarch64_clzv16qi (__a);
15427 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15428 vclzq_s16 (int16x8_t __a)
15430 return __builtin_aarch64_clzv8hi (__a);
15433 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15434 vclzq_s32 (int32x4_t __a)
15436 return __builtin_aarch64_clzv4si (__a);
15439 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15440 vclzq_u8 (uint8x16_t __a)
15442 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
15445 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15446 vclzq_u16 (uint16x8_t __a)
15448 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
15451 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15452 vclzq_u32 (uint32x4_t __a)
15454 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
15457 /* vcvt (double -> float). */
15459 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15460 vcvt_f32_f64 (float64x2_t __a)
15462 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
15465 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15466 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
15468 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
15471 /* vcvt (float -> double). */
15473 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15474 vcvt_f64_f32 (float32x2_t __a)
15477 return __builtin_aarch64_float_extend_lo_v2df (__a);
15480 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15481 vcvt_high_f64_f32 (float32x4_t __a)
15483 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
15486 /* vcvt (<u>int -> float) */
15488 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15489 vcvtd_f64_s64 (int64_t __a)
15491 return (float64_t) __a;
15494 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15495 vcvtd_f64_u64 (uint64_t __a)
15497 return (float64_t) __a;
15500 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15501 vcvts_f32_s32 (int32_t __a)
15503 return (float32_t) __a;
15506 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15507 vcvts_f32_u32 (uint32_t __a)
15509 return (float32_t) __a;
15512 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15513 vcvt_f32_s32 (int32x2_t __a)
15515 return __builtin_aarch64_floatv2siv2sf (__a);
15518 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15519 vcvt_f32_u32 (uint32x2_t __a)
15521 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
15524 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15525 vcvtq_f32_s32 (int32x4_t __a)
15527 return __builtin_aarch64_floatv4siv4sf (__a);
15530 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15531 vcvtq_f32_u32 (uint32x4_t __a)
15533 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
15536 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15537 vcvtq_f64_s64 (int64x2_t __a)
15539 return __builtin_aarch64_floatv2div2df (__a);
15542 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15543 vcvtq_f64_u64 (uint64x2_t __a)
15545 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
15548 /* vcvt (float -> <u>int) */
15550 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15551 vcvtd_s64_f64 (float64_t __a)
15553 return (int64_t) __a;
15556 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15557 vcvtd_u64_f64 (float64_t __a)
15559 return (uint64_t) __a;
15562 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15563 vcvts_s32_f32 (float32_t __a)
15565 return (int32_t) __a;
15568 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15569 vcvts_u32_f32 (float32_t __a)
15571 return (uint32_t) __a;
15574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15575 vcvt_s32_f32 (float32x2_t __a)
15577 return __builtin_aarch64_lbtruncv2sfv2si (__a);
15580 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15581 vcvt_u32_f32 (float32x2_t __a)
15583 /* TODO: This cast should go away when builtins have
15584 their correct types. */
15585 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
15588 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15589 vcvtq_s32_f32 (float32x4_t __a)
15591 return __builtin_aarch64_lbtruncv4sfv4si (__a);
15594 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15595 vcvtq_u32_f32 (float32x4_t __a)
15597 /* TODO: This cast should go away when builtins have
15598 their correct types. */
15599 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
15602 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15603 vcvtq_s64_f64 (float64x2_t __a)
15605 return __builtin_aarch64_lbtruncv2dfv2di (__a);
15608 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15609 vcvtq_u64_f64 (float64x2_t __a)
15611 /* TODO: This cast should go away when builtins have
15612 their correct types. */
15613 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
15616 /* vcvta */
15618 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15619 vcvtad_s64_f64 (float64_t __a)
15621 return __builtin_aarch64_lrounddfdi (__a);
15624 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15625 vcvtad_u64_f64 (float64_t __a)
15627 return __builtin_aarch64_lroundudfdi (__a);
15630 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15631 vcvtas_s32_f32 (float32_t __a)
15633 return __builtin_aarch64_lroundsfsi (__a);
15636 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15637 vcvtas_u32_f32 (float32_t __a)
15639 return __builtin_aarch64_lroundusfsi (__a);
15642 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15643 vcvta_s32_f32 (float32x2_t __a)
15645 return __builtin_aarch64_lroundv2sfv2si (__a);
15648 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15649 vcvta_u32_f32 (float32x2_t __a)
15651 /* TODO: This cast should go away when builtins have
15652 their correct types. */
15653 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
15656 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15657 vcvtaq_s32_f32 (float32x4_t __a)
15659 return __builtin_aarch64_lroundv4sfv4si (__a);
15662 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15663 vcvtaq_u32_f32 (float32x4_t __a)
15665 /* TODO: This cast should go away when builtins have
15666 their correct types. */
15667 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
15670 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15671 vcvtaq_s64_f64 (float64x2_t __a)
15673 return __builtin_aarch64_lroundv2dfv2di (__a);
15676 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15677 vcvtaq_u64_f64 (float64x2_t __a)
15679 /* TODO: This cast should go away when builtins have
15680 their correct types. */
15681 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
15684 /* vcvtm */
15686 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15687 vcvtmd_s64_f64 (float64_t __a)
15689 return __builtin_llfloor (__a);
15692 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15693 vcvtmd_u64_f64 (float64_t __a)
15695 return __builtin_aarch64_lfloorudfdi (__a);
15698 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15699 vcvtms_s32_f32 (float32_t __a)
15701 return __builtin_ifloorf (__a);
15704 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15705 vcvtms_u32_f32 (float32_t __a)
15707 return __builtin_aarch64_lfloorusfsi (__a);
15710 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15711 vcvtm_s32_f32 (float32x2_t __a)
15713 return __builtin_aarch64_lfloorv2sfv2si (__a);
15716 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15717 vcvtm_u32_f32 (float32x2_t __a)
15719 /* TODO: This cast should go away when builtins have
15720 their correct types. */
15721 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
15724 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15725 vcvtmq_s32_f32 (float32x4_t __a)
15727 return __builtin_aarch64_lfloorv4sfv4si (__a);
15730 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15731 vcvtmq_u32_f32 (float32x4_t __a)
15733 /* TODO: This cast should go away when builtins have
15734 their correct types. */
15735 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
15738 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15739 vcvtmq_s64_f64 (float64x2_t __a)
15741 return __builtin_aarch64_lfloorv2dfv2di (__a);
15744 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15745 vcvtmq_u64_f64 (float64x2_t __a)
15747 /* TODO: This cast should go away when builtins have
15748 their correct types. */
15749 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
15752 /* vcvtn */
15754 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15755 vcvtnd_s64_f64 (float64_t __a)
15757 return __builtin_aarch64_lfrintndfdi (__a);
15760 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15761 vcvtnd_u64_f64 (float64_t __a)
15763 return __builtin_aarch64_lfrintnudfdi (__a);
15766 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15767 vcvtns_s32_f32 (float32_t __a)
15769 return __builtin_aarch64_lfrintnsfsi (__a);
15772 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15773 vcvtns_u32_f32 (float32_t __a)
15775 return __builtin_aarch64_lfrintnusfsi (__a);
15778 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15779 vcvtn_s32_f32 (float32x2_t __a)
15781 return __builtin_aarch64_lfrintnv2sfv2si (__a);
15784 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15785 vcvtn_u32_f32 (float32x2_t __a)
15787 /* TODO: This cast should go away when builtins have
15788 their correct types. */
15789 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
15792 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15793 vcvtnq_s32_f32 (float32x4_t __a)
15795 return __builtin_aarch64_lfrintnv4sfv4si (__a);
15798 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15799 vcvtnq_u32_f32 (float32x4_t __a)
15801 /* TODO: This cast should go away when builtins have
15802 their correct types. */
15803 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
15806 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15807 vcvtnq_s64_f64 (float64x2_t __a)
15809 return __builtin_aarch64_lfrintnv2dfv2di (__a);
15812 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15813 vcvtnq_u64_f64 (float64x2_t __a)
15815 /* TODO: This cast should go away when builtins have
15816 their correct types. */
15817 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
15820 /* vcvtp */
15822 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15823 vcvtpd_s64_f64 (float64_t __a)
15825 return __builtin_llceil (__a);
15828 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15829 vcvtpd_u64_f64 (float64_t __a)
15831 return __builtin_aarch64_lceiludfdi (__a);
15834 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15835 vcvtps_s32_f32 (float32_t __a)
15837 return __builtin_iceilf (__a);
15840 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15841 vcvtps_u32_f32 (float32_t __a)
15843 return __builtin_aarch64_lceilusfsi (__a);
15846 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15847 vcvtp_s32_f32 (float32x2_t __a)
15849 return __builtin_aarch64_lceilv2sfv2si (__a);
15852 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15853 vcvtp_u32_f32 (float32x2_t __a)
15855 /* TODO: This cast should go away when builtins have
15856 their correct types. */
15857 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
15860 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15861 vcvtpq_s32_f32 (float32x4_t __a)
15863 return __builtin_aarch64_lceilv4sfv4si (__a);
15866 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15867 vcvtpq_u32_f32 (float32x4_t __a)
15869 /* TODO: This cast should go away when builtins have
15870 their correct types. */
15871 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
15874 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15875 vcvtpq_s64_f64 (float64x2_t __a)
15877 return __builtin_aarch64_lceilv2dfv2di (__a);
15880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15881 vcvtpq_u64_f64 (float64x2_t __a)
15883 /* TODO: This cast should go away when builtins have
15884 their correct types. */
15885 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
15888 /* vdup_n */
15890 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15891 vdup_n_f32 (float32_t __a)
15893 return (float32x2_t) {__a, __a};
15896 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15897 vdup_n_f64 (float64_t __a)
15899 return (float64x1_t) {__a};
15902 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15903 vdup_n_p8 (poly8_t __a)
15905 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15908 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15909 vdup_n_p16 (poly16_t __a)
15911 return (poly16x4_t) {__a, __a, __a, __a};
15914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15915 vdup_n_s8 (int8_t __a)
15917 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15920 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15921 vdup_n_s16 (int16_t __a)
15923 return (int16x4_t) {__a, __a, __a, __a};
15926 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15927 vdup_n_s32 (int32_t __a)
15929 return (int32x2_t) {__a, __a};
15932 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15933 vdup_n_s64 (int64_t __a)
15935 return (int64x1_t) {__a};
15938 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15939 vdup_n_u8 (uint8_t __a)
15941 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15944 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15945 vdup_n_u16 (uint16_t __a)
15947 return (uint16x4_t) {__a, __a, __a, __a};
15950 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15951 vdup_n_u32 (uint32_t __a)
15953 return (uint32x2_t) {__a, __a};
15956 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15957 vdup_n_u64 (uint64_t __a)
15959 return (uint64x1_t) {__a};
15962 /* vdupq_n */
15964 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15965 vdupq_n_f32 (float32_t __a)
15967 return (float32x4_t) {__a, __a, __a, __a};
15970 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15971 vdupq_n_f64 (float64_t __a)
15973 return (float64x2_t) {__a, __a};
15976 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15977 vdupq_n_p8 (uint32_t __a)
15979 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15980 __a, __a, __a, __a, __a, __a, __a, __a};
15983 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15984 vdupq_n_p16 (uint32_t __a)
15986 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15989 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15990 vdupq_n_s8 (int32_t __a)
15992 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15993 __a, __a, __a, __a, __a, __a, __a, __a};
15996 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15997 vdupq_n_s16 (int32_t __a)
15999 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
16002 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16003 vdupq_n_s32 (int32_t __a)
16005 return (int32x4_t) {__a, __a, __a, __a};
16008 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16009 vdupq_n_s64 (int64_t __a)
16011 return (int64x2_t) {__a, __a};
16014 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16015 vdupq_n_u8 (uint32_t __a)
16017 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
16018 __a, __a, __a, __a, __a, __a, __a, __a};
16021 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16022 vdupq_n_u16 (uint32_t __a)
16024 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
16027 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16028 vdupq_n_u32 (uint32_t __a)
16030 return (uint32x4_t) {__a, __a, __a, __a};
16033 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16034 vdupq_n_u64 (uint64_t __a)
16036 return (uint64x2_t) {__a, __a};
16039 /* vdup_lane */
16041 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16042 vdup_lane_f32 (float32x2_t __a, const int __b)
16044 return __aarch64_vdup_lane_f32 (__a, __b);
16047 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16048 vdup_lane_f64 (float64x1_t __a, const int __b)
16050 return __aarch64_vdup_lane_f64 (__a, __b);
16053 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16054 vdup_lane_p8 (poly8x8_t __a, const int __b)
16056 return __aarch64_vdup_lane_p8 (__a, __b);
16059 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16060 vdup_lane_p16 (poly16x4_t __a, const int __b)
16062 return __aarch64_vdup_lane_p16 (__a, __b);
16065 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16066 vdup_lane_s8 (int8x8_t __a, const int __b)
16068 return __aarch64_vdup_lane_s8 (__a, __b);
16071 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16072 vdup_lane_s16 (int16x4_t __a, const int __b)
16074 return __aarch64_vdup_lane_s16 (__a, __b);
16077 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16078 vdup_lane_s32 (int32x2_t __a, const int __b)
16080 return __aarch64_vdup_lane_s32 (__a, __b);
16083 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16084 vdup_lane_s64 (int64x1_t __a, const int __b)
16086 return __aarch64_vdup_lane_s64 (__a, __b);
16089 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16090 vdup_lane_u8 (uint8x8_t __a, const int __b)
16092 return __aarch64_vdup_lane_u8 (__a, __b);
16095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16096 vdup_lane_u16 (uint16x4_t __a, const int __b)
16098 return __aarch64_vdup_lane_u16 (__a, __b);
16101 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16102 vdup_lane_u32 (uint32x2_t __a, const int __b)
16104 return __aarch64_vdup_lane_u32 (__a, __b);
16107 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16108 vdup_lane_u64 (uint64x1_t __a, const int __b)
16110 return __aarch64_vdup_lane_u64 (__a, __b);
16113 /* vdup_laneq */
16115 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16116 vdup_laneq_f32 (float32x4_t __a, const int __b)
16118 return __aarch64_vdup_laneq_f32 (__a, __b);
16121 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16122 vdup_laneq_f64 (float64x2_t __a, const int __b)
16124 return __aarch64_vdup_laneq_f64 (__a, __b);
16127 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16128 vdup_laneq_p8 (poly8x16_t __a, const int __b)
16130 return __aarch64_vdup_laneq_p8 (__a, __b);
16133 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16134 vdup_laneq_p16 (poly16x8_t __a, const int __b)
16136 return __aarch64_vdup_laneq_p16 (__a, __b);
16139 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16140 vdup_laneq_s8 (int8x16_t __a, const int __b)
16142 return __aarch64_vdup_laneq_s8 (__a, __b);
16145 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16146 vdup_laneq_s16 (int16x8_t __a, const int __b)
16148 return __aarch64_vdup_laneq_s16 (__a, __b);
16151 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16152 vdup_laneq_s32 (int32x4_t __a, const int __b)
16154 return __aarch64_vdup_laneq_s32 (__a, __b);
16157 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16158 vdup_laneq_s64 (int64x2_t __a, const int __b)
16160 return __aarch64_vdup_laneq_s64 (__a, __b);
16163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16164 vdup_laneq_u8 (uint8x16_t __a, const int __b)
16166 return __aarch64_vdup_laneq_u8 (__a, __b);
16169 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16170 vdup_laneq_u16 (uint16x8_t __a, const int __b)
16172 return __aarch64_vdup_laneq_u16 (__a, __b);
16175 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16176 vdup_laneq_u32 (uint32x4_t __a, const int __b)
16178 return __aarch64_vdup_laneq_u32 (__a, __b);
16181 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16182 vdup_laneq_u64 (uint64x2_t __a, const int __b)
16184 return __aarch64_vdup_laneq_u64 (__a, __b);
16187 /* vdupq_lane */
16188 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16189 vdupq_lane_f32 (float32x2_t __a, const int __b)
16191 return __aarch64_vdupq_lane_f32 (__a, __b);
16194 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16195 vdupq_lane_f64 (float64x1_t __a, const int __b)
16197 return __aarch64_vdupq_lane_f64 (__a, __b);
16200 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16201 vdupq_lane_p8 (poly8x8_t __a, const int __b)
16203 return __aarch64_vdupq_lane_p8 (__a, __b);
16206 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16207 vdupq_lane_p16 (poly16x4_t __a, const int __b)
16209 return __aarch64_vdupq_lane_p16 (__a, __b);
16212 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16213 vdupq_lane_s8 (int8x8_t __a, const int __b)
16215 return __aarch64_vdupq_lane_s8 (__a, __b);
16218 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16219 vdupq_lane_s16 (int16x4_t __a, const int __b)
16221 return __aarch64_vdupq_lane_s16 (__a, __b);
16224 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16225 vdupq_lane_s32 (int32x2_t __a, const int __b)
16227 return __aarch64_vdupq_lane_s32 (__a, __b);
16230 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16231 vdupq_lane_s64 (int64x1_t __a, const int __b)
16233 return __aarch64_vdupq_lane_s64 (__a, __b);
16236 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16237 vdupq_lane_u8 (uint8x8_t __a, const int __b)
16239 return __aarch64_vdupq_lane_u8 (__a, __b);
16242 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16243 vdupq_lane_u16 (uint16x4_t __a, const int __b)
16245 return __aarch64_vdupq_lane_u16 (__a, __b);
16248 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16249 vdupq_lane_u32 (uint32x2_t __a, const int __b)
16251 return __aarch64_vdupq_lane_u32 (__a, __b);
16254 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16255 vdupq_lane_u64 (uint64x1_t __a, const int __b)
16257 return __aarch64_vdupq_lane_u64 (__a, __b);
16260 /* vdupq_laneq */
16261 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16262 vdupq_laneq_f32 (float32x4_t __a, const int __b)
16264 return __aarch64_vdupq_laneq_f32 (__a, __b);
16267 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16268 vdupq_laneq_f64 (float64x2_t __a, const int __b)
16270 return __aarch64_vdupq_laneq_f64 (__a, __b);
16273 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16274 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
16276 return __aarch64_vdupq_laneq_p8 (__a, __b);
16279 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16280 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
16282 return __aarch64_vdupq_laneq_p16 (__a, __b);
16285 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16286 vdupq_laneq_s8 (int8x16_t __a, const int __b)
16288 return __aarch64_vdupq_laneq_s8 (__a, __b);
16291 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16292 vdupq_laneq_s16 (int16x8_t __a, const int __b)
16294 return __aarch64_vdupq_laneq_s16 (__a, __b);
16297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16298 vdupq_laneq_s32 (int32x4_t __a, const int __b)
16300 return __aarch64_vdupq_laneq_s32 (__a, __b);
16303 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16304 vdupq_laneq_s64 (int64x2_t __a, const int __b)
16306 return __aarch64_vdupq_laneq_s64 (__a, __b);
16309 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16310 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
16312 return __aarch64_vdupq_laneq_u8 (__a, __b);
16315 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16316 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
16318 return __aarch64_vdupq_laneq_u16 (__a, __b);
16321 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16322 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
16324 return __aarch64_vdupq_laneq_u32 (__a, __b);
16327 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16328 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
16330 return __aarch64_vdupq_laneq_u64 (__a, __b);
16333 /* vdupb_lane */
16334 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16335 vdupb_lane_p8 (poly8x8_t __a, const int __b)
16337 return __aarch64_vget_lane_p8 (__a, __b);
16340 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16341 vdupb_lane_s8 (int8x8_t __a, const int __b)
16343 return __aarch64_vget_lane_s8 (__a, __b);
16346 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16347 vdupb_lane_u8 (uint8x8_t __a, const int __b)
16349 return __aarch64_vget_lane_u8 (__a, __b);
16352 /* vduph_lane */
16353 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16354 vduph_lane_p16 (poly16x4_t __a, const int __b)
16356 return __aarch64_vget_lane_p16 (__a, __b);
16359 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16360 vduph_lane_s16 (int16x4_t __a, const int __b)
16362 return __aarch64_vget_lane_s16 (__a, __b);
16365 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16366 vduph_lane_u16 (uint16x4_t __a, const int __b)
16368 return __aarch64_vget_lane_u16 (__a, __b);
16371 /* vdups_lane */
16372 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16373 vdups_lane_f32 (float32x2_t __a, const int __b)
16375 return __aarch64_vget_lane_f32 (__a, __b);
16378 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16379 vdups_lane_s32 (int32x2_t __a, const int __b)
16381 return __aarch64_vget_lane_s32 (__a, __b);
16384 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16385 vdups_lane_u32 (uint32x2_t __a, const int __b)
16387 return __aarch64_vget_lane_u32 (__a, __b);
16390 /* vdupd_lane */
16391 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16392 vdupd_lane_f64 (float64x1_t __a, const int __b)
16394 __builtin_aarch64_im_lane_boundsi (__b, 1);
16395 return __a[0];
16398 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16399 vdupd_lane_s64 (int64x1_t __a, const int __b)
16401 __builtin_aarch64_im_lane_boundsi (__b, 1);
16402 return __a[0];
16405 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16406 vdupd_lane_u64 (uint64x1_t __a, const int __b)
16408 __builtin_aarch64_im_lane_boundsi (__b, 1);
16409 return __a[0];
16412 /* vdupb_laneq */
16413 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16414 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
16416 return __aarch64_vgetq_lane_p8 (__a, __b);
16419 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16420 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
16422 return __aarch64_vgetq_lane_s8 (__a, __b);
16425 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16426 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
16428 return __aarch64_vgetq_lane_u8 (__a, __b);
16431 /* vduph_laneq */
16432 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16433 vduph_laneq_p16 (poly16x8_t __a, const int __b)
16435 return __aarch64_vgetq_lane_p16 (__a, __b);
16438 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16439 vduph_laneq_s16 (int16x8_t __a, const int __b)
16441 return __aarch64_vgetq_lane_s16 (__a, __b);
16444 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16445 vduph_laneq_u16 (uint16x8_t __a, const int __b)
16447 return __aarch64_vgetq_lane_u16 (__a, __b);
16450 /* vdups_laneq */
16451 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16452 vdups_laneq_f32 (float32x4_t __a, const int __b)
16454 return __aarch64_vgetq_lane_f32 (__a, __b);
16457 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16458 vdups_laneq_s32 (int32x4_t __a, const int __b)
16460 return __aarch64_vgetq_lane_s32 (__a, __b);
16463 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16464 vdups_laneq_u32 (uint32x4_t __a, const int __b)
16466 return __aarch64_vgetq_lane_u32 (__a, __b);
16469 /* vdupd_laneq */
16470 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16471 vdupd_laneq_f64 (float64x2_t __a, const int __b)
16473 return __aarch64_vgetq_lane_f64 (__a, __b);
16476 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16477 vdupd_laneq_s64 (int64x2_t __a, const int __b)
16479 return __aarch64_vgetq_lane_s64 (__a, __b);
16482 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16483 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
16485 return __aarch64_vgetq_lane_u64 (__a, __b);
16488 /* vext */
16490 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16491 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
16493 __builtin_aarch64_im_lane_boundsi (__c, 2);
16494 #ifdef __AARCH64EB__
16495 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16496 #else
16497 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16498 #endif
16501 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16502 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
16504 /* The only possible index to the assembler instruction returns element 0. */
16505 __builtin_aarch64_im_lane_boundsi (__c, 1);
16506 return __a;
16508 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16509 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
16511 __builtin_aarch64_im_lane_boundsi (__c, 8);
16512 #ifdef __AARCH64EB__
16513 return __builtin_shuffle (__b, __a, (uint8x8_t)
16514 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16515 #else
16516 return __builtin_shuffle (__a, __b,
16517 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16518 #endif
16521 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16522 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
16524 __builtin_aarch64_im_lane_boundsi (__c, 4);
16525 #ifdef __AARCH64EB__
16526 return __builtin_shuffle (__b, __a,
16527 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16528 #else
16529 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16530 #endif
16533 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16534 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
16536 __builtin_aarch64_im_lane_boundsi (__c, 8);
16537 #ifdef __AARCH64EB__
16538 return __builtin_shuffle (__b, __a, (uint8x8_t)
16539 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16540 #else
16541 return __builtin_shuffle (__a, __b,
16542 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16543 #endif
16546 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16547 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
16549 __builtin_aarch64_im_lane_boundsi (__c, 4);
16550 #ifdef __AARCH64EB__
16551 return __builtin_shuffle (__b, __a,
16552 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16553 #else
16554 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16555 #endif
16558 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16559 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
16561 __builtin_aarch64_im_lane_boundsi (__c, 2);
16562 #ifdef __AARCH64EB__
16563 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16564 #else
16565 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16566 #endif
16569 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16570 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
16572 /* The only possible index to the assembler instruction returns element 0. */
16573 __builtin_aarch64_im_lane_boundsi (__c, 1);
16574 return __a;
16577 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16578 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
16580 __builtin_aarch64_im_lane_boundsi (__c, 8);
16581 #ifdef __AARCH64EB__
16582 return __builtin_shuffle (__b, __a, (uint8x8_t)
16583 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16584 #else
16585 return __builtin_shuffle (__a, __b,
16586 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16587 #endif
16590 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16591 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
16593 __builtin_aarch64_im_lane_boundsi (__c, 4);
16594 #ifdef __AARCH64EB__
16595 return __builtin_shuffle (__b, __a,
16596 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16597 #else
16598 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16599 #endif
16602 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16603 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
16605 __builtin_aarch64_im_lane_boundsi (__c, 2);
16606 #ifdef __AARCH64EB__
16607 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16608 #else
16609 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16610 #endif
16613 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16614 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
16616 /* The only possible index to the assembler instruction returns element 0. */
16617 __builtin_aarch64_im_lane_boundsi (__c, 1);
16618 return __a;
16621 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16622 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
16624 __builtin_aarch64_im_lane_boundsi (__c, 4);
16625 #ifdef __AARCH64EB__
16626 return __builtin_shuffle (__b, __a,
16627 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16628 #else
16629 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16630 #endif
16633 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16634 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
16636 __builtin_aarch64_im_lane_boundsi (__c, 2);
16637 #ifdef __AARCH64EB__
16638 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16639 #else
16640 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16641 #endif
16644 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16645 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
16647 __builtin_aarch64_im_lane_boundsi (__c, 16);
16648 #ifdef __AARCH64EB__
16649 return __builtin_shuffle (__b, __a, (uint8x16_t)
16650 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16651 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16652 #else
16653 return __builtin_shuffle (__a, __b, (uint8x16_t)
16654 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16655 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16656 #endif
16659 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16660 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
16662 __builtin_aarch64_im_lane_boundsi (__c, 8);
16663 #ifdef __AARCH64EB__
16664 return __builtin_shuffle (__b, __a, (uint16x8_t)
16665 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16666 #else
16667 return __builtin_shuffle (__a, __b,
16668 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16669 #endif
16672 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16673 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
16675 __builtin_aarch64_im_lane_boundsi (__c, 16);
16676 #ifdef __AARCH64EB__
16677 return __builtin_shuffle (__b, __a, (uint8x16_t)
16678 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16679 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16680 #else
16681 return __builtin_shuffle (__a, __b, (uint8x16_t)
16682 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16683 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16684 #endif
16687 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16688 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
16690 __builtin_aarch64_im_lane_boundsi (__c, 8);
16691 #ifdef __AARCH64EB__
16692 return __builtin_shuffle (__b, __a, (uint16x8_t)
16693 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16694 #else
16695 return __builtin_shuffle (__a, __b,
16696 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16697 #endif
16700 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16701 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
16703 __builtin_aarch64_im_lane_boundsi (__c, 4);
16704 #ifdef __AARCH64EB__
16705 return __builtin_shuffle (__b, __a,
16706 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16707 #else
16708 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16709 #endif
16712 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16713 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
16715 __builtin_aarch64_im_lane_boundsi (__c, 2);
16716 #ifdef __AARCH64EB__
16717 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16718 #else
16719 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16720 #endif
16723 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16724 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
16726 __builtin_aarch64_im_lane_boundsi (__c, 16);
16727 #ifdef __AARCH64EB__
16728 return __builtin_shuffle (__b, __a, (uint8x16_t)
16729 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16730 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16731 #else
16732 return __builtin_shuffle (__a, __b, (uint8x16_t)
16733 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16734 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16735 #endif
16738 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16739 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
16741 __builtin_aarch64_im_lane_boundsi (__c, 8);
16742 #ifdef __AARCH64EB__
16743 return __builtin_shuffle (__b, __a, (uint16x8_t)
16744 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16745 #else
16746 return __builtin_shuffle (__a, __b,
16747 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16748 #endif
16751 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16752 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
16754 __builtin_aarch64_im_lane_boundsi (__c, 4);
16755 #ifdef __AARCH64EB__
16756 return __builtin_shuffle (__b, __a,
16757 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16758 #else
16759 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16760 #endif
16763 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16764 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
16766 __builtin_aarch64_im_lane_boundsi (__c, 2);
16767 #ifdef __AARCH64EB__
16768 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16769 #else
16770 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16771 #endif
16774 /* vfma */
16776 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16777 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16779 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16782 /* vfma_lane */
16784 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16785 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
16786 float32x2_t __c, const int __lane)
16788 return __builtin_aarch64_fmav2sf (__b,
16789 __aarch64_vdup_lane_f32 (__c, __lane),
16790 __a);
16793 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16794 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
16795 float64x1_t __c, const int __lane)
16797 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16800 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16801 vfmad_lane_f64 (float64_t __a, float64_t __b,
16802 float64x1_t __c, const int __lane)
16804 return __builtin_fma (__b, __c[0], __a);
16807 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16808 vfmas_lane_f32 (float32_t __a, float32_t __b,
16809 float32x2_t __c, const int __lane)
16811 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16814 /* vfma_laneq */
16816 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16817 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
16818 float32x4_t __c, const int __lane)
16820 return __builtin_aarch64_fmav2sf (__b,
16821 __aarch64_vdup_laneq_f32 (__c, __lane),
16822 __a);
16825 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16826 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
16827 float64x2_t __c, const int __lane)
16829 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16830 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
16833 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16834 vfmad_laneq_f64 (float64_t __a, float64_t __b,
16835 float64x2_t __c, const int __lane)
16837 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16840 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16841 vfmas_laneq_f32 (float32_t __a, float32_t __b,
16842 float32x4_t __c, const int __lane)
16844 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16847 /* vfmaq_lane */
16849 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16850 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
16851 float32x2_t __c, const int __lane)
16853 return __builtin_aarch64_fmav4sf (__b,
16854 __aarch64_vdupq_lane_f32 (__c, __lane),
16855 __a);
16858 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16859 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
16860 float64x1_t __c, const int __lane)
16862 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
16865 /* vfmaq_laneq */
16867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16868 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16869 float32x4_t __c, const int __lane)
16871 return __builtin_aarch64_fmav4sf (__b,
16872 __aarch64_vdupq_laneq_f32 (__c, __lane),
16873 __a);
16876 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16877 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16878 float64x2_t __c, const int __lane)
16880 return __builtin_aarch64_fmav2df (__b,
16881 __aarch64_vdupq_laneq_f64 (__c, __lane),
16882 __a);
16885 /* vfms */
16887 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16888 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16890 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16893 /* vfms_lane */
16895 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16896 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
16897 float32x2_t __c, const int __lane)
16899 return __builtin_aarch64_fmav2sf (-__b,
16900 __aarch64_vdup_lane_f32 (__c, __lane),
16901 __a);
16904 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16905 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
16906 float64x1_t __c, const int __lane)
16908 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16911 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16912 vfmsd_lane_f64 (float64_t __a, float64_t __b,
16913 float64x1_t __c, const int __lane)
16915 return __builtin_fma (-__b, __c[0], __a);
16918 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16919 vfmss_lane_f32 (float32_t __a, float32_t __b,
16920 float32x2_t __c, const int __lane)
16922 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16925 /* vfms_laneq */
16927 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16928 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
16929 float32x4_t __c, const int __lane)
16931 return __builtin_aarch64_fmav2sf (-__b,
16932 __aarch64_vdup_laneq_f32 (__c, __lane),
16933 __a);
16936 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16937 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
16938 float64x2_t __c, const int __lane)
16940 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16941 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
16944 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16945 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
16946 float64x2_t __c, const int __lane)
16948 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16951 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16952 vfmss_laneq_f32 (float32_t __a, float32_t __b,
16953 float32x4_t __c, const int __lane)
16955 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16958 /* vfmsq_lane */
16960 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16961 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
16962 float32x2_t __c, const int __lane)
16964 return __builtin_aarch64_fmav4sf (-__b,
16965 __aarch64_vdupq_lane_f32 (__c, __lane),
16966 __a);
16969 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16970 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
16971 float64x1_t __c, const int __lane)
16973 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
16976 /* vfmsq_laneq */
16978 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16979 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16980 float32x4_t __c, const int __lane)
16982 return __builtin_aarch64_fmav4sf (-__b,
16983 __aarch64_vdupq_laneq_f32 (__c, __lane),
16984 __a);
16987 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16988 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16989 float64x2_t __c, const int __lane)
16991 return __builtin_aarch64_fmav2df (-__b,
16992 __aarch64_vdupq_laneq_f64 (__c, __lane),
16993 __a);
16996 /* vld1 */
16998 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16999 vld1_f32 (const float32_t *a)
17001 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
17004 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17005 vld1_f64 (const float64_t *a)
17007 return (float64x1_t) {*a};
17010 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17011 vld1_p8 (const poly8_t *a)
17013 return (poly8x8_t)
17014 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
17017 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17018 vld1_p16 (const poly16_t *a)
17020 return (poly16x4_t)
17021 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
17024 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17025 vld1_s8 (const int8_t *a)
17027 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
17030 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17031 vld1_s16 (const int16_t *a)
17033 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
17036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17037 vld1_s32 (const int32_t *a)
17039 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
17042 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17043 vld1_s64 (const int64_t *a)
17045 return (int64x1_t) {*a};
17048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17049 vld1_u8 (const uint8_t *a)
17051 return (uint8x8_t)
17052 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
17055 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17056 vld1_u16 (const uint16_t *a)
17058 return (uint16x4_t)
17059 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
17062 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17063 vld1_u32 (const uint32_t *a)
17065 return (uint32x2_t)
17066 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
17069 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17070 vld1_u64 (const uint64_t *a)
17072 return (uint64x1_t) {*a};
17075 /* vld1q */
17077 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17078 vld1q_f32 (const float32_t *a)
17080 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
17083 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17084 vld1q_f64 (const float64_t *a)
17086 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
17089 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17090 vld1q_p8 (const poly8_t *a)
17092 return (poly8x16_t)
17093 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17096 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17097 vld1q_p16 (const poly16_t *a)
17099 return (poly16x8_t)
17100 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17103 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17104 vld1q_s8 (const int8_t *a)
17106 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17109 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17110 vld1q_s16 (const int16_t *a)
17112 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17115 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17116 vld1q_s32 (const int32_t *a)
17118 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17121 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17122 vld1q_s64 (const int64_t *a)
17124 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17127 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17128 vld1q_u8 (const uint8_t *a)
17130 return (uint8x16_t)
17131 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17134 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17135 vld1q_u16 (const uint16_t *a)
17137 return (uint16x8_t)
17138 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17142 vld1q_u32 (const uint32_t *a)
17144 return (uint32x4_t)
17145 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17148 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17149 vld1q_u64 (const uint64_t *a)
17151 return (uint64x2_t)
17152 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17155 /* vldn */
17157 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
17158 vld2_s64 (const int64_t * __a)
17160 int64x1x2_t ret;
17161 __builtin_aarch64_simd_oi __o;
17162 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17163 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17164 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17165 return ret;
17168 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
17169 vld2_u64 (const uint64_t * __a)
17171 uint64x1x2_t ret;
17172 __builtin_aarch64_simd_oi __o;
17173 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17174 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17175 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17176 return ret;
17179 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
17180 vld2_f64 (const float64_t * __a)
17182 float64x1x2_t ret;
17183 __builtin_aarch64_simd_oi __o;
17184 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
17185 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
17186 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
17187 return ret;
17190 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
17191 vld2_s8 (const int8_t * __a)
17193 int8x8x2_t ret;
17194 __builtin_aarch64_simd_oi __o;
17195 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17196 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17197 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17198 return ret;
17201 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
17202 vld2_p8 (const poly8_t * __a)
17204 poly8x8x2_t ret;
17205 __builtin_aarch64_simd_oi __o;
17206 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17207 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17208 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17209 return ret;
17212 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
17213 vld2_s16 (const int16_t * __a)
17215 int16x4x2_t ret;
17216 __builtin_aarch64_simd_oi __o;
17217 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17218 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17219 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17220 return ret;
17223 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
17224 vld2_p16 (const poly16_t * __a)
17226 poly16x4x2_t ret;
17227 __builtin_aarch64_simd_oi __o;
17228 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17229 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17230 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17231 return ret;
17234 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
17235 vld2_s32 (const int32_t * __a)
17237 int32x2x2_t ret;
17238 __builtin_aarch64_simd_oi __o;
17239 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17240 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17241 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17242 return ret;
17245 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
17246 vld2_u8 (const uint8_t * __a)
17248 uint8x8x2_t ret;
17249 __builtin_aarch64_simd_oi __o;
17250 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17251 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17252 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17253 return ret;
17256 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
17257 vld2_u16 (const uint16_t * __a)
17259 uint16x4x2_t ret;
17260 __builtin_aarch64_simd_oi __o;
17261 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17262 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17263 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17264 return ret;
17267 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
17268 vld2_u32 (const uint32_t * __a)
17270 uint32x2x2_t ret;
17271 __builtin_aarch64_simd_oi __o;
17272 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17273 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17274 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17275 return ret;
17278 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
17279 vld2_f32 (const float32_t * __a)
17281 float32x2x2_t ret;
17282 __builtin_aarch64_simd_oi __o;
17283 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
17284 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
17285 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
17286 return ret;
17289 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
17290 vld2q_s8 (const int8_t * __a)
17292 int8x16x2_t ret;
17293 __builtin_aarch64_simd_oi __o;
17294 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17295 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17296 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17297 return ret;
17300 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
17301 vld2q_p8 (const poly8_t * __a)
17303 poly8x16x2_t ret;
17304 __builtin_aarch64_simd_oi __o;
17305 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17306 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17307 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17308 return ret;
17311 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
17312 vld2q_s16 (const int16_t * __a)
17314 int16x8x2_t ret;
17315 __builtin_aarch64_simd_oi __o;
17316 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17317 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17318 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17319 return ret;
17322 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
17323 vld2q_p16 (const poly16_t * __a)
17325 poly16x8x2_t ret;
17326 __builtin_aarch64_simd_oi __o;
17327 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17328 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17329 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17330 return ret;
17333 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
17334 vld2q_s32 (const int32_t * __a)
17336 int32x4x2_t ret;
17337 __builtin_aarch64_simd_oi __o;
17338 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17339 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17340 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17341 return ret;
17344 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
17345 vld2q_s64 (const int64_t * __a)
17347 int64x2x2_t ret;
17348 __builtin_aarch64_simd_oi __o;
17349 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17350 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17351 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17352 return ret;
17355 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
17356 vld2q_u8 (const uint8_t * __a)
17358 uint8x16x2_t ret;
17359 __builtin_aarch64_simd_oi __o;
17360 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17361 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17362 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17363 return ret;
17366 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
17367 vld2q_u16 (const uint16_t * __a)
17369 uint16x8x2_t ret;
17370 __builtin_aarch64_simd_oi __o;
17371 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17372 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17373 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17374 return ret;
17377 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
17378 vld2q_u32 (const uint32_t * __a)
17380 uint32x4x2_t ret;
17381 __builtin_aarch64_simd_oi __o;
17382 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17383 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17384 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17385 return ret;
17388 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
17389 vld2q_u64 (const uint64_t * __a)
17391 uint64x2x2_t ret;
17392 __builtin_aarch64_simd_oi __o;
17393 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17394 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17395 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17396 return ret;
17399 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
17400 vld2q_f32 (const float32_t * __a)
17402 float32x4x2_t ret;
17403 __builtin_aarch64_simd_oi __o;
17404 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
17405 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
17406 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
17407 return ret;
17410 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
17411 vld2q_f64 (const float64_t * __a)
17413 float64x2x2_t ret;
17414 __builtin_aarch64_simd_oi __o;
17415 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
17416 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
17417 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
17418 return ret;
17421 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
17422 vld3_s64 (const int64_t * __a)
17424 int64x1x3_t ret;
17425 __builtin_aarch64_simd_ci __o;
17426 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17427 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17428 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17429 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17430 return ret;
17433 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
17434 vld3_u64 (const uint64_t * __a)
17436 uint64x1x3_t ret;
17437 __builtin_aarch64_simd_ci __o;
17438 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17439 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17440 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17441 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17442 return ret;
17445 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
17446 vld3_f64 (const float64_t * __a)
17448 float64x1x3_t ret;
17449 __builtin_aarch64_simd_ci __o;
17450 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
17451 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
17452 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
17453 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
17454 return ret;
17457 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
17458 vld3_s8 (const int8_t * __a)
17460 int8x8x3_t ret;
17461 __builtin_aarch64_simd_ci __o;
17462 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17463 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17464 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17465 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17466 return ret;
17469 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
17470 vld3_p8 (const poly8_t * __a)
17472 poly8x8x3_t ret;
17473 __builtin_aarch64_simd_ci __o;
17474 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17475 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17476 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17477 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17478 return ret;
17481 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17482 vld3_s16 (const int16_t * __a)
17484 int16x4x3_t ret;
17485 __builtin_aarch64_simd_ci __o;
17486 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17487 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17488 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17489 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17490 return ret;
17493 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17494 vld3_p16 (const poly16_t * __a)
17496 poly16x4x3_t ret;
17497 __builtin_aarch64_simd_ci __o;
17498 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17499 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17500 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17501 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17502 return ret;
17505 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17506 vld3_s32 (const int32_t * __a)
17508 int32x2x3_t ret;
17509 __builtin_aarch64_simd_ci __o;
17510 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17511 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17512 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17513 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17514 return ret;
17517 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17518 vld3_u8 (const uint8_t * __a)
17520 uint8x8x3_t ret;
17521 __builtin_aarch64_simd_ci __o;
17522 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17523 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17524 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17525 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17526 return ret;
17529 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17530 vld3_u16 (const uint16_t * __a)
17532 uint16x4x3_t ret;
17533 __builtin_aarch64_simd_ci __o;
17534 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17535 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17536 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17537 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17538 return ret;
17541 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17542 vld3_u32 (const uint32_t * __a)
17544 uint32x2x3_t ret;
17545 __builtin_aarch64_simd_ci __o;
17546 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17547 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17548 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17549 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17550 return ret;
17553 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17554 vld3_f32 (const float32_t * __a)
17556 float32x2x3_t ret;
17557 __builtin_aarch64_simd_ci __o;
17558 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
17559 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17560 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17561 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17562 return ret;
17565 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17566 vld3q_s8 (const int8_t * __a)
17568 int8x16x3_t ret;
17569 __builtin_aarch64_simd_ci __o;
17570 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17571 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17572 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17573 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17574 return ret;
17577 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17578 vld3q_p8 (const poly8_t * __a)
17580 poly8x16x3_t ret;
17581 __builtin_aarch64_simd_ci __o;
17582 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17583 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17584 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17585 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17586 return ret;
17589 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17590 vld3q_s16 (const int16_t * __a)
17592 int16x8x3_t ret;
17593 __builtin_aarch64_simd_ci __o;
17594 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17595 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17596 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17597 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17598 return ret;
17601 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17602 vld3q_p16 (const poly16_t * __a)
17604 poly16x8x3_t ret;
17605 __builtin_aarch64_simd_ci __o;
17606 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17607 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17608 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17609 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17610 return ret;
17613 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17614 vld3q_s32 (const int32_t * __a)
17616 int32x4x3_t ret;
17617 __builtin_aarch64_simd_ci __o;
17618 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17619 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17620 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17621 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17622 return ret;
17625 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17626 vld3q_s64 (const int64_t * __a)
17628 int64x2x3_t ret;
17629 __builtin_aarch64_simd_ci __o;
17630 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17631 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17632 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17633 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17634 return ret;
17637 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17638 vld3q_u8 (const uint8_t * __a)
17640 uint8x16x3_t ret;
17641 __builtin_aarch64_simd_ci __o;
17642 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17643 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17644 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17645 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17646 return ret;
17649 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17650 vld3q_u16 (const uint16_t * __a)
17652 uint16x8x3_t ret;
17653 __builtin_aarch64_simd_ci __o;
17654 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17655 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17656 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17657 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17658 return ret;
17661 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17662 vld3q_u32 (const uint32_t * __a)
17664 uint32x4x3_t ret;
17665 __builtin_aarch64_simd_ci __o;
17666 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17667 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17668 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17669 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17670 return ret;
17673 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17674 vld3q_u64 (const uint64_t * __a)
17676 uint64x2x3_t ret;
17677 __builtin_aarch64_simd_ci __o;
17678 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17679 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17680 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17681 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17682 return ret;
17685 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17686 vld3q_f32 (const float32_t * __a)
17688 float32x4x3_t ret;
17689 __builtin_aarch64_simd_ci __o;
17690 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
17691 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17692 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17693 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17694 return ret;
17697 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17698 vld3q_f64 (const float64_t * __a)
17700 float64x2x3_t ret;
17701 __builtin_aarch64_simd_ci __o;
17702 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
17703 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17704 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17705 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17706 return ret;
17709 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17710 vld4_s64 (const int64_t * __a)
17712 int64x1x4_t ret;
17713 __builtin_aarch64_simd_xi __o;
17714 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17715 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17716 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17717 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17718 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17719 return ret;
17722 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17723 vld4_u64 (const uint64_t * __a)
17725 uint64x1x4_t ret;
17726 __builtin_aarch64_simd_xi __o;
17727 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17728 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17729 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17730 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17731 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17732 return ret;
17735 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17736 vld4_f64 (const float64_t * __a)
17738 float64x1x4_t ret;
17739 __builtin_aarch64_simd_xi __o;
17740 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
17741 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17742 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17743 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17744 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17745 return ret;
17748 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17749 vld4_s8 (const int8_t * __a)
17751 int8x8x4_t ret;
17752 __builtin_aarch64_simd_xi __o;
17753 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17754 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17755 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17756 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17757 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17758 return ret;
17761 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17762 vld4_p8 (const poly8_t * __a)
17764 poly8x8x4_t ret;
17765 __builtin_aarch64_simd_xi __o;
17766 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17767 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17768 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17769 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17770 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17771 return ret;
17774 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17775 vld4_s16 (const int16_t * __a)
17777 int16x4x4_t ret;
17778 __builtin_aarch64_simd_xi __o;
17779 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17780 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17781 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17782 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17783 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17784 return ret;
17787 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17788 vld4_p16 (const poly16_t * __a)
17790 poly16x4x4_t ret;
17791 __builtin_aarch64_simd_xi __o;
17792 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17793 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17794 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17795 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17796 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17797 return ret;
17800 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17801 vld4_s32 (const int32_t * __a)
17803 int32x2x4_t ret;
17804 __builtin_aarch64_simd_xi __o;
17805 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17806 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17807 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17808 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17809 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17810 return ret;
17813 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17814 vld4_u8 (const uint8_t * __a)
17816 uint8x8x4_t ret;
17817 __builtin_aarch64_simd_xi __o;
17818 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17819 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17820 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17821 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17822 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17823 return ret;
17826 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17827 vld4_u16 (const uint16_t * __a)
17829 uint16x4x4_t ret;
17830 __builtin_aarch64_simd_xi __o;
17831 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17832 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17833 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17834 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17835 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17836 return ret;
17839 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17840 vld4_u32 (const uint32_t * __a)
17842 uint32x2x4_t ret;
17843 __builtin_aarch64_simd_xi __o;
17844 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17845 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17846 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17847 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17848 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17849 return ret;
17852 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17853 vld4_f32 (const float32_t * __a)
17855 float32x2x4_t ret;
17856 __builtin_aarch64_simd_xi __o;
17857 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
17858 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17859 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17860 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17861 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17862 return ret;
17865 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17866 vld4q_s8 (const int8_t * __a)
17868 int8x16x4_t ret;
17869 __builtin_aarch64_simd_xi __o;
17870 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17871 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17872 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17873 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17874 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17875 return ret;
17878 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17879 vld4q_p8 (const poly8_t * __a)
17881 poly8x16x4_t ret;
17882 __builtin_aarch64_simd_xi __o;
17883 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17884 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17885 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17886 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17887 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17888 return ret;
17891 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17892 vld4q_s16 (const int16_t * __a)
17894 int16x8x4_t ret;
17895 __builtin_aarch64_simd_xi __o;
17896 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17897 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17898 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17899 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17900 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17901 return ret;
17904 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17905 vld4q_p16 (const poly16_t * __a)
17907 poly16x8x4_t ret;
17908 __builtin_aarch64_simd_xi __o;
17909 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17910 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17911 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17912 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17913 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17914 return ret;
17917 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17918 vld4q_s32 (const int32_t * __a)
17920 int32x4x4_t ret;
17921 __builtin_aarch64_simd_xi __o;
17922 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17923 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17924 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17925 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17926 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17927 return ret;
17930 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17931 vld4q_s64 (const int64_t * __a)
17933 int64x2x4_t ret;
17934 __builtin_aarch64_simd_xi __o;
17935 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17936 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17937 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17938 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17939 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17940 return ret;
17943 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17944 vld4q_u8 (const uint8_t * __a)
17946 uint8x16x4_t ret;
17947 __builtin_aarch64_simd_xi __o;
17948 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17949 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17950 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17951 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17952 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17953 return ret;
17956 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17957 vld4q_u16 (const uint16_t * __a)
17959 uint16x8x4_t ret;
17960 __builtin_aarch64_simd_xi __o;
17961 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17962 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17963 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17964 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17965 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17966 return ret;
17969 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17970 vld4q_u32 (const uint32_t * __a)
17972 uint32x4x4_t ret;
17973 __builtin_aarch64_simd_xi __o;
17974 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17975 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17976 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17977 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17978 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17979 return ret;
17982 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17983 vld4q_u64 (const uint64_t * __a)
17985 uint64x2x4_t ret;
17986 __builtin_aarch64_simd_xi __o;
17987 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17988 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17989 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17990 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17991 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17992 return ret;
17995 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17996 vld4q_f32 (const float32_t * __a)
17998 float32x4x4_t ret;
17999 __builtin_aarch64_simd_xi __o;
18000 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
18001 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
18002 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
18003 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
18004 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
18005 return ret;
18008 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
18009 vld4q_f64 (const float64_t * __a)
18011 float64x2x4_t ret;
18012 __builtin_aarch64_simd_xi __o;
18013 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
18014 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
18015 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
18016 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
18017 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
18018 return ret;
18021 /* vmax */
18023 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18024 vmax_f32 (float32x2_t __a, float32x2_t __b)
18026 return __builtin_aarch64_smax_nanv2sf (__a, __b);
18029 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18030 vmax_s8 (int8x8_t __a, int8x8_t __b)
18032 return __builtin_aarch64_smaxv8qi (__a, __b);
18035 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18036 vmax_s16 (int16x4_t __a, int16x4_t __b)
18038 return __builtin_aarch64_smaxv4hi (__a, __b);
18041 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18042 vmax_s32 (int32x2_t __a, int32x2_t __b)
18044 return __builtin_aarch64_smaxv2si (__a, __b);
18047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18048 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
18050 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
18051 (int8x8_t) __b);
18054 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18055 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
18057 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
18058 (int16x4_t) __b);
18061 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18062 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
18064 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
18065 (int32x2_t) __b);
18068 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18069 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
18071 return __builtin_aarch64_smax_nanv4sf (__a, __b);
18074 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18075 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
18077 return __builtin_aarch64_smax_nanv2df (__a, __b);
18080 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18081 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
18083 return __builtin_aarch64_smaxv16qi (__a, __b);
18086 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18087 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
18089 return __builtin_aarch64_smaxv8hi (__a, __b);
18092 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18093 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
18095 return __builtin_aarch64_smaxv4si (__a, __b);
18098 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18099 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
18101 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
18102 (int8x16_t) __b);
18105 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18106 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
18108 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
18109 (int16x8_t) __b);
18112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18113 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
18115 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
18116 (int32x4_t) __b);
18119 /* vmaxnm */
18121 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18122 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
18124 return __builtin_aarch64_smaxv2sf (__a, __b);
18127 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18128 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
18130 return __builtin_aarch64_smaxv4sf (__a, __b);
18133 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18134 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
18136 return __builtin_aarch64_smaxv2df (__a, __b);
18139 /* vmaxv */
18141 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18142 vmaxv_f32 (float32x2_t __a)
18144 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
18148 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18149 vmaxv_s8 (int8x8_t __a)
18151 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
18154 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18155 vmaxv_s16 (int16x4_t __a)
18157 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
18160 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18161 vmaxv_s32 (int32x2_t __a)
18163 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
18166 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18167 vmaxv_u8 (uint8x8_t __a)
18169 return vget_lane_u8 ((uint8x8_t)
18170 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
18174 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18175 vmaxv_u16 (uint16x4_t __a)
18177 return vget_lane_u16 ((uint16x4_t)
18178 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
18182 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18183 vmaxv_u32 (uint32x2_t __a)
18185 return vget_lane_u32 ((uint32x2_t)
18186 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
18190 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18191 vmaxvq_f32 (float32x4_t __a)
18193 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
18197 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18198 vmaxvq_f64 (float64x2_t __a)
18200 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
18204 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18205 vmaxvq_s8 (int8x16_t __a)
18207 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
18210 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18211 vmaxvq_s16 (int16x8_t __a)
18213 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
18216 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18217 vmaxvq_s32 (int32x4_t __a)
18219 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
18222 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18223 vmaxvq_u8 (uint8x16_t __a)
18225 return vgetq_lane_u8 ((uint8x16_t)
18226 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
18230 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18231 vmaxvq_u16 (uint16x8_t __a)
18233 return vgetq_lane_u16 ((uint16x8_t)
18234 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
18238 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18239 vmaxvq_u32 (uint32x4_t __a)
18241 return vgetq_lane_u32 ((uint32x4_t)
18242 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
18246 /* vmaxnmv */
18248 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18249 vmaxnmv_f32 (float32x2_t __a)
18251 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
18255 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18256 vmaxnmvq_f32 (float32x4_t __a)
18258 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
18261 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18262 vmaxnmvq_f64 (float64x2_t __a)
18264 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
18267 /* vmin */
18269 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18270 vmin_f32 (float32x2_t __a, float32x2_t __b)
18272 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18276 vmin_s8 (int8x8_t __a, int8x8_t __b)
18278 return __builtin_aarch64_sminv8qi (__a, __b);
18281 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18282 vmin_s16 (int16x4_t __a, int16x4_t __b)
18284 return __builtin_aarch64_sminv4hi (__a, __b);
18287 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18288 vmin_s32 (int32x2_t __a, int32x2_t __b)
18290 return __builtin_aarch64_sminv2si (__a, __b);
18293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18294 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18296 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18297 (int8x8_t) __b);
18300 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18301 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18303 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18304 (int16x4_t) __b);
18307 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18308 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18310 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18311 (int32x2_t) __b);
18314 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18315 vminq_f32 (float32x4_t __a, float32x4_t __b)
18317 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18320 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18321 vminq_f64 (float64x2_t __a, float64x2_t __b)
18323 return __builtin_aarch64_smin_nanv2df (__a, __b);
18326 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18327 vminq_s8 (int8x16_t __a, int8x16_t __b)
18329 return __builtin_aarch64_sminv16qi (__a, __b);
18332 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18333 vminq_s16 (int16x8_t __a, int16x8_t __b)
18335 return __builtin_aarch64_sminv8hi (__a, __b);
18338 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18339 vminq_s32 (int32x4_t __a, int32x4_t __b)
18341 return __builtin_aarch64_sminv4si (__a, __b);
18344 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18345 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18347 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18348 (int8x16_t) __b);
18351 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18352 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18354 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18355 (int16x8_t) __b);
18358 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18359 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18361 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18362 (int32x4_t) __b);
18365 /* vminnm */
18367 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18368 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18370 return __builtin_aarch64_sminv2sf (__a, __b);
18373 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18374 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18376 return __builtin_aarch64_sminv4sf (__a, __b);
18379 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18380 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18382 return __builtin_aarch64_sminv2df (__a, __b);
18385 /* vminv */
18387 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18388 vminv_f32 (float32x2_t __a)
18390 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
18394 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18395 vminv_s8 (int8x8_t __a)
18397 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
18401 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18402 vminv_s16 (int16x4_t __a)
18404 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
18407 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18408 vminv_s32 (int32x2_t __a)
18410 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
18413 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18414 vminv_u8 (uint8x8_t __a)
18416 return vget_lane_u8 ((uint8x8_t)
18417 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
18421 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18422 vminv_u16 (uint16x4_t __a)
18424 return vget_lane_u16 ((uint16x4_t)
18425 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
18429 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18430 vminv_u32 (uint32x2_t __a)
18432 return vget_lane_u32 ((uint32x2_t)
18433 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
18437 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18438 vminvq_f32 (float32x4_t __a)
18440 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
18444 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18445 vminvq_f64 (float64x2_t __a)
18447 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
18451 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18452 vminvq_s8 (int8x16_t __a)
18454 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
18457 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18458 vminvq_s16 (int16x8_t __a)
18460 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
18463 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18464 vminvq_s32 (int32x4_t __a)
18466 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
18469 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18470 vminvq_u8 (uint8x16_t __a)
18472 return vgetq_lane_u8 ((uint8x16_t)
18473 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
18477 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18478 vminvq_u16 (uint16x8_t __a)
18480 return vgetq_lane_u16 ((uint16x8_t)
18481 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
18485 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18486 vminvq_u32 (uint32x4_t __a)
18488 return vgetq_lane_u32 ((uint32x4_t)
18489 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
18493 /* vminnmv */
18495 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18496 vminnmv_f32 (float32x2_t __a)
18498 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
18501 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18502 vminnmvq_f32 (float32x4_t __a)
18504 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
18507 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18508 vminnmvq_f64 (float64x2_t __a)
18510 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
18513 /* vmla */
18515 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18516 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18518 return a + b * c;
18521 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18522 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18524 return __a + __b * __c;
18527 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18528 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18530 return a + b * c;
18533 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18534 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18536 return a + b * c;
18539 /* vmla_lane */
18541 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18542 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18543 float32x2_t __c, const int __lane)
18545 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18548 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18549 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18550 int16x4_t __c, const int __lane)
18552 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18555 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18556 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18557 int32x2_t __c, const int __lane)
18559 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18562 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18563 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18564 uint16x4_t __c, const int __lane)
18566 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18569 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18570 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18571 uint32x2_t __c, const int __lane)
18573 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18576 /* vmla_laneq */
18578 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18579 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18580 float32x4_t __c, const int __lane)
18582 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18585 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18586 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18587 int16x8_t __c, const int __lane)
18589 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18593 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18594 int32x4_t __c, const int __lane)
18596 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18599 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18600 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18601 uint16x8_t __c, const int __lane)
18603 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18606 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18607 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18608 uint32x4_t __c, const int __lane)
18610 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18613 /* vmlaq_lane */
18615 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18616 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18617 float32x2_t __c, const int __lane)
18619 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18622 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18623 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18624 int16x4_t __c, const int __lane)
18626 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18629 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18630 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18631 int32x2_t __c, const int __lane)
18633 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18637 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18638 uint16x4_t __c, const int __lane)
18640 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18643 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18644 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18645 uint32x2_t __c, const int __lane)
18647 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18650 /* vmlaq_laneq */
18652 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18653 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18654 float32x4_t __c, const int __lane)
18656 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18660 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18661 int16x8_t __c, const int __lane)
18663 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18666 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18667 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18668 int32x4_t __c, const int __lane)
18670 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18674 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18675 uint16x8_t __c, const int __lane)
18677 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18681 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18682 uint32x4_t __c, const int __lane)
18684 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18687 /* vmls */
18689 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18690 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18692 return a - b * c;
18695 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18696 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18698 return __a - __b * __c;
18701 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18702 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18704 return a - b * c;
18707 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18708 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18710 return a - b * c;
18713 /* vmls_lane */
18715 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18716 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18717 float32x2_t __c, const int __lane)
18719 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18722 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18723 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18724 int16x4_t __c, const int __lane)
18726 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18730 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18731 int32x2_t __c, const int __lane)
18733 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18736 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18737 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18738 uint16x4_t __c, const int __lane)
18740 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18743 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18744 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18745 uint32x2_t __c, const int __lane)
18747 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18750 /* vmls_laneq */
18752 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18753 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18754 float32x4_t __c, const int __lane)
18756 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18759 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18760 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18761 int16x8_t __c, const int __lane)
18763 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18766 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18767 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18768 int32x4_t __c, const int __lane)
18770 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18773 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18774 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18775 uint16x8_t __c, const int __lane)
18777 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18781 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18782 uint32x4_t __c, const int __lane)
18784 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18787 /* vmlsq_lane */
18789 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18790 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18791 float32x2_t __c, const int __lane)
18793 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18797 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18798 int16x4_t __c, const int __lane)
18800 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18804 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18805 int32x2_t __c, const int __lane)
18807 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18810 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18811 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18812 uint16x4_t __c, const int __lane)
18814 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18817 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18818 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18819 uint32x2_t __c, const int __lane)
18821 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18824 /* vmlsq_laneq */
18826 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18827 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18828 float32x4_t __c, const int __lane)
18830 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18833 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18834 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18835 int16x8_t __c, const int __lane)
18837 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18840 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18841 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18842 int32x4_t __c, const int __lane)
18844 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18846 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18847 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18848 uint16x8_t __c, const int __lane)
18850 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18853 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18854 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18855 uint32x4_t __c, const int __lane)
18857 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18860 /* vmov_n_ */
18862 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18863 vmov_n_f32 (float32_t __a)
18865 return vdup_n_f32 (__a);
18868 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18869 vmov_n_f64 (float64_t __a)
18871 return (float64x1_t) {__a};
18874 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18875 vmov_n_p8 (poly8_t __a)
18877 return vdup_n_p8 (__a);
18880 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18881 vmov_n_p16 (poly16_t __a)
18883 return vdup_n_p16 (__a);
18886 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18887 vmov_n_s8 (int8_t __a)
18889 return vdup_n_s8 (__a);
18892 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18893 vmov_n_s16 (int16_t __a)
18895 return vdup_n_s16 (__a);
18898 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18899 vmov_n_s32 (int32_t __a)
18901 return vdup_n_s32 (__a);
18904 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18905 vmov_n_s64 (int64_t __a)
18907 return (int64x1_t) {__a};
18910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18911 vmov_n_u8 (uint8_t __a)
18913 return vdup_n_u8 (__a);
18916 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18917 vmov_n_u16 (uint16_t __a)
18919 return vdup_n_u16 (__a);
18922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18923 vmov_n_u32 (uint32_t __a)
18925 return vdup_n_u32 (__a);
18928 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18929 vmov_n_u64 (uint64_t __a)
18931 return (uint64x1_t) {__a};
18934 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18935 vmovq_n_f32 (float32_t __a)
18937 return vdupq_n_f32 (__a);
18940 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18941 vmovq_n_f64 (float64_t __a)
18943 return vdupq_n_f64 (__a);
18946 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18947 vmovq_n_p8 (poly8_t __a)
18949 return vdupq_n_p8 (__a);
18952 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18953 vmovq_n_p16 (poly16_t __a)
18955 return vdupq_n_p16 (__a);
18958 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18959 vmovq_n_s8 (int8_t __a)
18961 return vdupq_n_s8 (__a);
18964 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18965 vmovq_n_s16 (int16_t __a)
18967 return vdupq_n_s16 (__a);
18970 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18971 vmovq_n_s32 (int32_t __a)
18973 return vdupq_n_s32 (__a);
18976 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18977 vmovq_n_s64 (int64_t __a)
18979 return vdupq_n_s64 (__a);
18982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18983 vmovq_n_u8 (uint8_t __a)
18985 return vdupq_n_u8 (__a);
18988 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18989 vmovq_n_u16 (uint16_t __a)
18991 return vdupq_n_u16 (__a);
18994 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18995 vmovq_n_u32 (uint32_t __a)
18997 return vdupq_n_u32 (__a);
19000 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19001 vmovq_n_u64 (uint64_t __a)
19003 return vdupq_n_u64 (__a);
19006 /* vmul_lane */
19008 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19009 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
19011 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19014 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19015 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
19017 return __a * __b;
19020 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19021 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
19023 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19026 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19027 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
19029 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19032 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19033 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
19035 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19038 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19039 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
19041 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19044 /* vmul_laneq */
19046 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19047 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
19049 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19052 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19053 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
19055 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19059 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
19061 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19064 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19065 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
19067 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19070 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19071 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
19073 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19076 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19077 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
19079 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19082 /* vmulq_lane */
19084 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19085 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
19087 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19090 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19091 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
19093 __builtin_aarch64_im_lane_boundsi (__lane, 1);
19094 return __a * __b[0];
19097 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19098 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
19100 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19103 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19104 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
19106 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19109 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19110 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
19112 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19115 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19116 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
19118 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19121 /* vmulq_laneq */
19123 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19124 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
19126 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19129 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19130 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
19132 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19135 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19136 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
19138 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19142 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
19144 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19148 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
19150 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19154 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
19156 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19159 /* vneg */
19161 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19162 vneg_f32 (float32x2_t __a)
19164 return -__a;
19167 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19168 vneg_f64 (float64x1_t __a)
19170 return -__a;
19173 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19174 vneg_s8 (int8x8_t __a)
19176 return -__a;
19179 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19180 vneg_s16 (int16x4_t __a)
19182 return -__a;
19185 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19186 vneg_s32 (int32x2_t __a)
19188 return -__a;
19191 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19192 vneg_s64 (int64x1_t __a)
19194 return -__a;
19197 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19198 vnegq_f32 (float32x4_t __a)
19200 return -__a;
19203 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19204 vnegq_f64 (float64x2_t __a)
19206 return -__a;
19209 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19210 vnegq_s8 (int8x16_t __a)
19212 return -__a;
19215 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19216 vnegq_s16 (int16x8_t __a)
19218 return -__a;
19221 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19222 vnegq_s32 (int32x4_t __a)
19224 return -__a;
19227 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19228 vnegq_s64 (int64x2_t __a)
19230 return -__a;
19233 /* vqabs */
19235 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19236 vqabsq_s64 (int64x2_t __a)
19238 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19241 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19242 vqabsb_s8 (int8x1_t __a)
19244 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
19247 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19248 vqabsh_s16 (int16x1_t __a)
19250 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
19253 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19254 vqabss_s32 (int32x1_t __a)
19256 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
19259 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19260 vqabsd_s64 (int64_t __a)
19262 return __builtin_aarch64_sqabsdi (__a);
19265 /* vqadd */
19267 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19268 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
19270 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
19273 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19274 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
19276 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
19279 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19280 vqadds_s32 (int32x1_t __a, int32x1_t __b)
19282 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
19285 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19286 vqaddd_s64 (int64_t __a, int64_t __b)
19288 return __builtin_aarch64_sqadddi (__a, __b);
19291 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19292 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
19294 return (uint8x1_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19297 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19298 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
19300 return (uint16x1_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19303 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19304 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
19306 return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19309 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19310 vqaddd_u64 (uint64_t __a, uint64_t __b)
19312 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19315 /* vqdmlal */
19317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19318 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19320 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19323 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19324 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19326 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19329 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19330 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19331 int const __d)
19333 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19337 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19338 int const __d)
19340 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19343 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19344 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19346 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19349 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19350 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19352 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19355 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19356 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19358 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19361 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19362 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19364 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19367 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19368 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19370 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19373 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19374 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19376 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19379 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19380 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19381 int const __d)
19383 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19386 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19387 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19388 int const __d)
19390 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19393 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19394 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19396 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19399 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19400 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19402 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19406 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19408 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19411 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19412 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19414 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19417 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19418 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19420 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19423 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19424 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19426 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19429 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19430 vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19432 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19435 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19436 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
19438 return (int64x1_t)
19439 {__builtin_aarch64_sqdmlal_lanesi (__a[0], __b, __c, __d)};
19442 /* vqdmlsl */
19444 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19445 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19447 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19450 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19451 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19453 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19456 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19457 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19458 int const __d)
19460 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19463 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19464 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19465 int const __d)
19467 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19470 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19471 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19473 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19477 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19479 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19483 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19485 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19488 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19489 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19491 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19494 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19495 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19497 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19500 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19501 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19503 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19506 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19507 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19508 int const __d)
19510 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19513 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19514 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19515 int const __d)
19517 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19520 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19521 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19523 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19526 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19527 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19529 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19532 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19533 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19535 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19538 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19539 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19541 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19544 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19545 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19547 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19550 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19551 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19553 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19556 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19557 vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19559 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19562 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19563 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
19565 return (int64x1_t) {__builtin_aarch64_sqdmlsl_lanesi (__a[0], __b, __c, __d)};
19568 /* vqdmulh */
19570 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19571 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19573 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19576 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19577 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19579 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19582 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19583 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19585 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19588 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19589 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19591 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19594 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19595 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19597 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19600 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19601 vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19603 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19606 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19607 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19609 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19612 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19613 vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19615 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19618 /* vqdmull */
19620 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19621 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19623 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19626 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19627 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19629 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19632 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19633 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19635 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19638 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19639 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19641 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19645 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19647 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19650 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19651 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19653 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19656 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19657 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19659 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19662 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19663 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19665 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19668 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19669 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19671 return __builtin_aarch64_sqdmullv2si (__a, __b);
19674 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19675 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19677 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19680 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19681 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19683 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19686 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19687 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19689 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19692 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19693 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19695 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19698 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19699 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19701 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19704 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19705 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19707 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19710 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19711 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19713 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19716 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19717 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
19719 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
19722 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19723 vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19725 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19728 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19729 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
19731 return __builtin_aarch64_sqdmullsi (__a, __b);
19734 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19735 vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19737 return (int64x1_t) {__builtin_aarch64_sqdmull_lanesi (__a, __b, __c)};
19740 /* vqmovn */
19742 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19743 vqmovn_s16 (int16x8_t __a)
19745 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19748 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19749 vqmovn_s32 (int32x4_t __a)
19751 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19755 vqmovn_s64 (int64x2_t __a)
19757 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19760 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19761 vqmovn_u16 (uint16x8_t __a)
19763 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19766 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19767 vqmovn_u32 (uint32x4_t __a)
19769 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19772 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19773 vqmovn_u64 (uint64x2_t __a)
19775 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19778 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19779 vqmovnh_s16 (int16x1_t __a)
19781 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
19784 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19785 vqmovns_s32 (int32x1_t __a)
19787 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
19790 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19791 vqmovnd_s64 (int64_t __a)
19793 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
19796 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19797 vqmovnh_u16 (uint16x1_t __a)
19799 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
19802 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19803 vqmovns_u32 (uint32x1_t __a)
19805 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
19808 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19809 vqmovnd_u64 (uint64_t __a)
19811 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
19814 /* vqmovun */
19816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19817 vqmovun_s16 (int16x8_t __a)
19819 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19822 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19823 vqmovun_s32 (int32x4_t __a)
19825 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19828 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19829 vqmovun_s64 (int64x2_t __a)
19831 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19834 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19835 vqmovunh_s16 (int16x1_t __a)
19837 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
19840 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19841 vqmovuns_s32 (int32x1_t __a)
19843 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
19846 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19847 vqmovund_s64 (int64_t __a)
19849 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
19852 /* vqneg */
19854 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19855 vqnegq_s64 (int64x2_t __a)
19857 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19860 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19861 vqnegb_s8 (int8x1_t __a)
19863 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
19866 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19867 vqnegh_s16 (int16x1_t __a)
19869 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
19872 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19873 vqnegs_s32 (int32x1_t __a)
19875 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
19878 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19879 vqnegd_s64 (int64_t __a)
19881 return __builtin_aarch64_sqnegdi (__a);
19884 /* vqrdmulh */
19886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19887 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19889 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19892 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19893 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19895 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19898 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19899 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19901 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19904 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19905 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19907 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19910 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19911 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19913 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19916 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19917 vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19919 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19922 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19923 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19925 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19928 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19929 vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19931 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19934 /* vqrshl */
19936 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19937 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19939 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19942 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19943 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19945 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19948 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19949 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19951 return __builtin_aarch64_sqrshlv2si (__a, __b);
19954 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19955 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19957 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19960 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19961 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19963 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19966 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19967 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19969 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19972 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19973 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19975 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19978 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19979 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19981 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19984 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19985 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19987 return __builtin_aarch64_sqrshlv16qi (__a, __b);
19990 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19991 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19993 return __builtin_aarch64_sqrshlv8hi (__a, __b);
19996 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19997 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19999 return __builtin_aarch64_sqrshlv4si (__a, __b);
20002 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20003 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
20005 return __builtin_aarch64_sqrshlv2di (__a, __b);
20008 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20009 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
20011 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
20014 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20015 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
20017 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
20020 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20021 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
20023 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
20026 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20027 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
20029 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
20032 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20033 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
20035 return __builtin_aarch64_sqrshlqi (__a, __b);
20038 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20039 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
20041 return __builtin_aarch64_sqrshlhi (__a, __b);
20044 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20045 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
20047 return __builtin_aarch64_sqrshlsi (__a, __b);
20050 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20051 vqrshld_s64 (int64_t __a, int64_t __b)
20053 return __builtin_aarch64_sqrshldi (__a, __b);
20056 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20057 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20059 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
20062 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20063 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20065 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
20068 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20069 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
20071 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20074 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20075 vqrshld_u64 (uint64_t __a, uint64_t __b)
20077 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20080 /* vqrshrn */
20082 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20083 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20085 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20089 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20091 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20094 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20095 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20097 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20101 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20103 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20107 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20109 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20112 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20113 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20115 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20118 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20119 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
20121 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20124 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20125 vqrshrns_n_s32 (int32x1_t __a, const int __b)
20127 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20130 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20131 vqrshrnd_n_s64 (int64_t __a, const int __b)
20133 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20136 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20137 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
20139 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20142 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20143 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
20145 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20148 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20149 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20151 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20154 /* vqrshrun */
20156 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20157 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20159 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20162 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20163 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20165 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20168 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20169 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20171 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20174 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20175 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
20177 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20180 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20181 vqrshruns_n_s32 (int32x1_t __a, const int __b)
20183 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20186 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20187 vqrshrund_n_s64 (int64_t __a, const int __b)
20189 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20192 /* vqshl */
20194 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20195 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20197 return __builtin_aarch64_sqshlv8qi (__a, __b);
20200 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20201 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20203 return __builtin_aarch64_sqshlv4hi (__a, __b);
20206 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20207 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20209 return __builtin_aarch64_sqshlv2si (__a, __b);
20212 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20213 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20215 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20218 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20219 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20221 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20225 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20227 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20230 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20231 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20233 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20236 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20237 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20239 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20242 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20243 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20245 return __builtin_aarch64_sqshlv16qi (__a, __b);
20248 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20249 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20251 return __builtin_aarch64_sqshlv8hi (__a, __b);
20254 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20255 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20257 return __builtin_aarch64_sqshlv4si (__a, __b);
20260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20261 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20263 return __builtin_aarch64_sqshlv2di (__a, __b);
20266 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20267 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20269 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20272 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20273 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20275 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20278 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20279 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20281 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20284 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20285 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20287 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20290 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20291 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
20293 return __builtin_aarch64_sqshlqi (__a, __b);
20296 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20297 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
20299 return __builtin_aarch64_sqshlhi (__a, __b);
20302 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20303 vqshls_s32 (int32x1_t __a, int32x1_t __b)
20305 return __builtin_aarch64_sqshlsi (__a, __b);
20308 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20309 vqshld_s64 (int64_t __a, int64_t __b)
20311 return __builtin_aarch64_sqshldi (__a, __b);
20314 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20315 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20317 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20320 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20321 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20323 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20326 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20327 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
20329 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20332 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20333 vqshld_u64 (uint64_t __a, uint64_t __b)
20335 return __builtin_aarch64_uqshldi_uus (__a, __b);
20338 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20339 vqshl_n_s8 (int8x8_t __a, const int __b)
20341 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20344 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20345 vqshl_n_s16 (int16x4_t __a, const int __b)
20347 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20350 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20351 vqshl_n_s32 (int32x2_t __a, const int __b)
20353 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20356 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20357 vqshl_n_s64 (int64x1_t __a, const int __b)
20359 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20362 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20363 vqshl_n_u8 (uint8x8_t __a, const int __b)
20365 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20368 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20369 vqshl_n_u16 (uint16x4_t __a, const int __b)
20371 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20374 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20375 vqshl_n_u32 (uint32x2_t __a, const int __b)
20377 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20380 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20381 vqshl_n_u64 (uint64x1_t __a, const int __b)
20383 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20386 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20387 vqshlq_n_s8 (int8x16_t __a, const int __b)
20389 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20392 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20393 vqshlq_n_s16 (int16x8_t __a, const int __b)
20395 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20398 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20399 vqshlq_n_s32 (int32x4_t __a, const int __b)
20401 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20404 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20405 vqshlq_n_s64 (int64x2_t __a, const int __b)
20407 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20411 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20413 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20416 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20417 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20419 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20422 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20423 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20425 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20428 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20429 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20431 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20434 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20435 vqshlb_n_s8 (int8x1_t __a, const int __b)
20437 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20440 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20441 vqshlh_n_s16 (int16x1_t __a, const int __b)
20443 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20446 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20447 vqshls_n_s32 (int32x1_t __a, const int __b)
20449 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20452 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20453 vqshld_n_s64 (int64_t __a, const int __b)
20455 return __builtin_aarch64_sqshl_ndi (__a, __b);
20458 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20459 vqshlb_n_u8 (uint8x1_t __a, const int __b)
20461 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20464 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20465 vqshlh_n_u16 (uint16x1_t __a, const int __b)
20467 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20470 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20471 vqshls_n_u32 (uint32x1_t __a, const int __b)
20473 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20476 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20477 vqshld_n_u64 (uint64_t __a, const int __b)
20479 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20482 /* vqshlu */
20484 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20485 vqshlu_n_s8 (int8x8_t __a, const int __b)
20487 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20490 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20491 vqshlu_n_s16 (int16x4_t __a, const int __b)
20493 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20496 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20497 vqshlu_n_s32 (int32x2_t __a, const int __b)
20499 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20502 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20503 vqshlu_n_s64 (int64x1_t __a, const int __b)
20505 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20508 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20509 vqshluq_n_s8 (int8x16_t __a, const int __b)
20511 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20514 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20515 vqshluq_n_s16 (int16x8_t __a, const int __b)
20517 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20521 vqshluq_n_s32 (int32x4_t __a, const int __b)
20523 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20526 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20527 vqshluq_n_s64 (int64x2_t __a, const int __b)
20529 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20532 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20533 vqshlub_n_s8 (int8x1_t __a, const int __b)
20535 return (int8x1_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20538 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20539 vqshluh_n_s16 (int16x1_t __a, const int __b)
20541 return (int16x1_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20544 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20545 vqshlus_n_s32 (int32x1_t __a, const int __b)
20547 return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20550 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20551 vqshlud_n_s64 (int64_t __a, const int __b)
20553 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20556 /* vqshrn */
20558 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20559 vqshrn_n_s16 (int16x8_t __a, const int __b)
20561 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20564 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20565 vqshrn_n_s32 (int32x4_t __a, const int __b)
20567 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20570 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20571 vqshrn_n_s64 (int64x2_t __a, const int __b)
20573 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20576 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20577 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20579 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20583 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20585 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20588 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20589 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20591 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20594 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20595 vqshrnh_n_s16 (int16x1_t __a, const int __b)
20597 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20600 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20601 vqshrns_n_s32 (int32x1_t __a, const int __b)
20603 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20606 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20607 vqshrnd_n_s64 (int64_t __a, const int __b)
20609 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20612 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20613 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
20615 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20618 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20619 vqshrns_n_u32 (uint32x1_t __a, const int __b)
20621 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20624 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20625 vqshrnd_n_u64 (uint64_t __a, const int __b)
20627 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20630 /* vqshrun */
20632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20633 vqshrun_n_s16 (int16x8_t __a, const int __b)
20635 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20638 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20639 vqshrun_n_s32 (int32x4_t __a, const int __b)
20641 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20645 vqshrun_n_s64 (int64x2_t __a, const int __b)
20647 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20650 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20651 vqshrunh_n_s16 (int16x1_t __a, const int __b)
20653 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20656 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20657 vqshruns_n_s32 (int32x1_t __a, const int __b)
20659 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20662 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20663 vqshrund_n_s64 (int64_t __a, const int __b)
20665 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20668 /* vqsub */
20670 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20671 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
20673 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
20676 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20677 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
20679 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
20682 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20683 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
20685 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
20688 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20689 vqsubd_s64 (int64_t __a, int64_t __b)
20691 return __builtin_aarch64_sqsubdi (__a, __b);
20694 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20695 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
20697 return (uint8x1_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20700 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20701 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
20703 return (uint16x1_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20706 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20707 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
20709 return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20712 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20713 vqsubd_u64 (uint64_t __a, uint64_t __b)
20715 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20718 /* vrecpe */
20720 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20721 vrecpes_f32 (float32_t __a)
20723 return __builtin_aarch64_frecpesf (__a);
20726 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20727 vrecped_f64 (float64_t __a)
20729 return __builtin_aarch64_frecpedf (__a);
20732 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20733 vrecpe_f32 (float32x2_t __a)
20735 return __builtin_aarch64_frecpev2sf (__a);
20738 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20739 vrecpeq_f32 (float32x4_t __a)
20741 return __builtin_aarch64_frecpev4sf (__a);
20744 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20745 vrecpeq_f64 (float64x2_t __a)
20747 return __builtin_aarch64_frecpev2df (__a);
20750 /* vrecps */
20752 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20753 vrecpss_f32 (float32_t __a, float32_t __b)
20755 return __builtin_aarch64_frecpssf (__a, __b);
20758 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20759 vrecpsd_f64 (float64_t __a, float64_t __b)
20761 return __builtin_aarch64_frecpsdf (__a, __b);
20764 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20765 vrecps_f32 (float32x2_t __a, float32x2_t __b)
20767 return __builtin_aarch64_frecpsv2sf (__a, __b);
20770 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20771 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
20773 return __builtin_aarch64_frecpsv4sf (__a, __b);
20776 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20777 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
20779 return __builtin_aarch64_frecpsv2df (__a, __b);
20782 /* vrecpx */
20784 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20785 vrecpxs_f32 (float32_t __a)
20787 return __builtin_aarch64_frecpxsf (__a);
20790 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20791 vrecpxd_f64 (float64_t __a)
20793 return __builtin_aarch64_frecpxdf (__a);
20797 /* vrev */
20799 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20800 vrev16_p8 (poly8x8_t a)
20802 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20805 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20806 vrev16_s8 (int8x8_t a)
20808 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20812 vrev16_u8 (uint8x8_t a)
20814 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20817 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20818 vrev16q_p8 (poly8x16_t a)
20820 return __builtin_shuffle (a,
20821 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20824 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20825 vrev16q_s8 (int8x16_t a)
20827 return __builtin_shuffle (a,
20828 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20831 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20832 vrev16q_u8 (uint8x16_t a)
20834 return __builtin_shuffle (a,
20835 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20838 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20839 vrev32_p8 (poly8x8_t a)
20841 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20844 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20845 vrev32_p16 (poly16x4_t a)
20847 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20850 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20851 vrev32_s8 (int8x8_t a)
20853 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20856 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20857 vrev32_s16 (int16x4_t a)
20859 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20863 vrev32_u8 (uint8x8_t a)
20865 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20868 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20869 vrev32_u16 (uint16x4_t a)
20871 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20874 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20875 vrev32q_p8 (poly8x16_t a)
20877 return __builtin_shuffle (a,
20878 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20881 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20882 vrev32q_p16 (poly16x8_t a)
20884 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20887 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20888 vrev32q_s8 (int8x16_t a)
20890 return __builtin_shuffle (a,
20891 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20894 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20895 vrev32q_s16 (int16x8_t a)
20897 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20900 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20901 vrev32q_u8 (uint8x16_t a)
20903 return __builtin_shuffle (a,
20904 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20907 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20908 vrev32q_u16 (uint16x8_t a)
20910 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20913 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20914 vrev64_f32 (float32x2_t a)
20916 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20919 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20920 vrev64_p8 (poly8x8_t a)
20922 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20925 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20926 vrev64_p16 (poly16x4_t a)
20928 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20931 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20932 vrev64_s8 (int8x8_t a)
20934 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20937 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20938 vrev64_s16 (int16x4_t a)
20940 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20943 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20944 vrev64_s32 (int32x2_t a)
20946 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20949 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20950 vrev64_u8 (uint8x8_t a)
20952 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20955 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20956 vrev64_u16 (uint16x4_t a)
20958 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20961 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20962 vrev64_u32 (uint32x2_t a)
20964 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20968 vrev64q_f32 (float32x4_t a)
20970 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
20973 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20974 vrev64q_p8 (poly8x16_t a)
20976 return __builtin_shuffle (a,
20977 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20980 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20981 vrev64q_p16 (poly16x8_t a)
20983 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20986 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20987 vrev64q_s8 (int8x16_t a)
20989 return __builtin_shuffle (a,
20990 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20993 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20994 vrev64q_s16 (int16x8_t a)
20996 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20999 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21000 vrev64q_s32 (int32x4_t a)
21002 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21005 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21006 vrev64q_u8 (uint8x16_t a)
21008 return __builtin_shuffle (a,
21009 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21012 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21013 vrev64q_u16 (uint16x8_t a)
21015 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21018 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21019 vrev64q_u32 (uint32x4_t a)
21021 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21024 /* vrnd */
21026 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21027 vrnd_f32 (float32x2_t __a)
21029 return __builtin_aarch64_btruncv2sf (__a);
21032 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21033 vrnd_f64 (float64x1_t __a)
21035 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21038 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21039 vrndq_f32 (float32x4_t __a)
21041 return __builtin_aarch64_btruncv4sf (__a);
21044 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21045 vrndq_f64 (float64x2_t __a)
21047 return __builtin_aarch64_btruncv2df (__a);
21050 /* vrnda */
21052 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21053 vrnda_f32 (float32x2_t __a)
21055 return __builtin_aarch64_roundv2sf (__a);
21058 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21059 vrnda_f64 (float64x1_t __a)
21061 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21064 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21065 vrndaq_f32 (float32x4_t __a)
21067 return __builtin_aarch64_roundv4sf (__a);
21070 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21071 vrndaq_f64 (float64x2_t __a)
21073 return __builtin_aarch64_roundv2df (__a);
21076 /* vrndi */
21078 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21079 vrndi_f32 (float32x2_t __a)
21081 return __builtin_aarch64_nearbyintv2sf (__a);
21084 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21085 vrndi_f64 (float64x1_t __a)
21087 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21090 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21091 vrndiq_f32 (float32x4_t __a)
21093 return __builtin_aarch64_nearbyintv4sf (__a);
21096 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21097 vrndiq_f64 (float64x2_t __a)
21099 return __builtin_aarch64_nearbyintv2df (__a);
21102 /* vrndm */
21104 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21105 vrndm_f32 (float32x2_t __a)
21107 return __builtin_aarch64_floorv2sf (__a);
21110 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21111 vrndm_f64 (float64x1_t __a)
21113 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21116 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21117 vrndmq_f32 (float32x4_t __a)
21119 return __builtin_aarch64_floorv4sf (__a);
21122 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21123 vrndmq_f64 (float64x2_t __a)
21125 return __builtin_aarch64_floorv2df (__a);
21128 /* vrndn */
21130 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21131 vrndn_f32 (float32x2_t __a)
21133 return __builtin_aarch64_frintnv2sf (__a);
21136 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21137 vrndn_f64 (float64x1_t __a)
21139 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21142 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21143 vrndnq_f32 (float32x4_t __a)
21145 return __builtin_aarch64_frintnv4sf (__a);
21148 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21149 vrndnq_f64 (float64x2_t __a)
21151 return __builtin_aarch64_frintnv2df (__a);
21154 /* vrndp */
21156 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21157 vrndp_f32 (float32x2_t __a)
21159 return __builtin_aarch64_ceilv2sf (__a);
21162 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21163 vrndp_f64 (float64x1_t __a)
21165 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21168 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21169 vrndpq_f32 (float32x4_t __a)
21171 return __builtin_aarch64_ceilv4sf (__a);
21174 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21175 vrndpq_f64 (float64x2_t __a)
21177 return __builtin_aarch64_ceilv2df (__a);
21180 /* vrndx */
21182 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21183 vrndx_f32 (float32x2_t __a)
21185 return __builtin_aarch64_rintv2sf (__a);
21188 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21189 vrndx_f64 (float64x1_t __a)
21191 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21194 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21195 vrndxq_f32 (float32x4_t __a)
21197 return __builtin_aarch64_rintv4sf (__a);
21200 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21201 vrndxq_f64 (float64x2_t __a)
21203 return __builtin_aarch64_rintv2df (__a);
21206 /* vrshl */
21208 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21209 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21211 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21214 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21215 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21217 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21221 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21223 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21226 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21227 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21229 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21232 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21233 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21235 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21238 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21239 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21241 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21244 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21245 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21247 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21250 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21251 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21253 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21256 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21257 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21259 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21262 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21263 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21265 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21268 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21269 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21271 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21275 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21277 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21280 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21281 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21283 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21286 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21287 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21289 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21292 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21293 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21295 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21298 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21299 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21301 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21304 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21305 vrshld_s64 (int64_t __a, int64_t __b)
21307 return __builtin_aarch64_srshldi (__a, __b);
21310 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21311 vrshld_u64 (uint64_t __a, int64_t __b)
21313 return __builtin_aarch64_urshldi_uus (__a, __b);
21316 /* vrshr */
21318 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21319 vrshr_n_s8 (int8x8_t __a, const int __b)
21321 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21324 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21325 vrshr_n_s16 (int16x4_t __a, const int __b)
21327 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21330 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21331 vrshr_n_s32 (int32x2_t __a, const int __b)
21333 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21336 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21337 vrshr_n_s64 (int64x1_t __a, const int __b)
21339 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21343 vrshr_n_u8 (uint8x8_t __a, const int __b)
21345 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21349 vrshr_n_u16 (uint16x4_t __a, const int __b)
21351 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21355 vrshr_n_u32 (uint32x2_t __a, const int __b)
21357 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21361 vrshr_n_u64 (uint64x1_t __a, const int __b)
21363 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21367 vrshrq_n_s8 (int8x16_t __a, const int __b)
21369 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21372 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21373 vrshrq_n_s16 (int16x8_t __a, const int __b)
21375 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21378 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21379 vrshrq_n_s32 (int32x4_t __a, const int __b)
21381 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21384 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21385 vrshrq_n_s64 (int64x2_t __a, const int __b)
21387 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21390 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21391 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21393 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21396 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21397 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21399 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21403 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21405 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21408 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21409 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21411 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21414 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21415 vrshrd_n_s64 (int64_t __a, const int __b)
21417 return __builtin_aarch64_srshr_ndi (__a, __b);
21420 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21421 vrshrd_n_u64 (uint64_t __a, const int __b)
21423 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21426 /* vrsra */
21428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21429 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21431 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21435 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21437 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21441 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21443 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21446 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21447 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21449 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21452 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21453 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21455 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21458 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21459 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21461 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21464 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21465 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21467 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21471 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21473 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21476 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21477 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21479 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21482 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21483 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21485 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21488 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21489 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21491 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21494 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21495 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21497 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21500 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21501 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21503 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21506 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21507 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21509 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21512 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21513 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21515 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21519 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21521 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21524 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21525 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21527 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21530 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21531 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21533 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21536 #ifdef __ARM_FEATURE_CRYPTO
21538 /* vsha1 */
21540 static __inline uint32x4_t
21541 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21543 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21545 static __inline uint32x4_t
21546 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21548 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21550 static __inline uint32x4_t
21551 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21553 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21556 static __inline uint32_t
21557 vsha1h_u32 (uint32_t hash_e)
21559 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21562 static __inline uint32x4_t
21563 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21565 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21568 static __inline uint32x4_t
21569 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21571 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21574 static __inline uint32x4_t
21575 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21577 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21580 static __inline uint32x4_t
21581 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21583 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21586 static __inline uint32x4_t
21587 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21589 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21592 static __inline uint32x4_t
21593 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21595 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21598 static __inline poly128_t
21599 vmull_p64 (poly64_t a, poly64_t b)
21601 return
21602 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21605 static __inline poly128_t
21606 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21608 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21611 #endif
21613 /* vshl */
21615 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21616 vshl_n_s8 (int8x8_t __a, const int __b)
21618 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21621 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21622 vshl_n_s16 (int16x4_t __a, const int __b)
21624 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21627 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21628 vshl_n_s32 (int32x2_t __a, const int __b)
21630 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21633 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21634 vshl_n_s64 (int64x1_t __a, const int __b)
21636 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21640 vshl_n_u8 (uint8x8_t __a, const int __b)
21642 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21645 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21646 vshl_n_u16 (uint16x4_t __a, const int __b)
21648 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21651 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21652 vshl_n_u32 (uint32x2_t __a, const int __b)
21654 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21657 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21658 vshl_n_u64 (uint64x1_t __a, const int __b)
21660 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21663 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21664 vshlq_n_s8 (int8x16_t __a, const int __b)
21666 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21669 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21670 vshlq_n_s16 (int16x8_t __a, const int __b)
21672 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21675 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21676 vshlq_n_s32 (int32x4_t __a, const int __b)
21678 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21681 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21682 vshlq_n_s64 (int64x2_t __a, const int __b)
21684 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21688 vshlq_n_u8 (uint8x16_t __a, const int __b)
21690 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21693 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21694 vshlq_n_u16 (uint16x8_t __a, const int __b)
21696 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21699 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21700 vshlq_n_u32 (uint32x4_t __a, const int __b)
21702 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21705 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21706 vshlq_n_u64 (uint64x2_t __a, const int __b)
21708 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
21711 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21712 vshld_n_s64 (int64_t __a, const int __b)
21714 return __builtin_aarch64_ashldi (__a, __b);
21717 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21718 vshld_n_u64 (uint64_t __a, const int __b)
21720 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
21723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21724 vshl_s8 (int8x8_t __a, int8x8_t __b)
21726 return __builtin_aarch64_sshlv8qi (__a, __b);
21729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21730 vshl_s16 (int16x4_t __a, int16x4_t __b)
21732 return __builtin_aarch64_sshlv4hi (__a, __b);
21735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21736 vshl_s32 (int32x2_t __a, int32x2_t __b)
21738 return __builtin_aarch64_sshlv2si (__a, __b);
21741 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21742 vshl_s64 (int64x1_t __a, int64x1_t __b)
21744 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
21747 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21748 vshl_u8 (uint8x8_t __a, int8x8_t __b)
21750 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
21753 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21754 vshl_u16 (uint16x4_t __a, int16x4_t __b)
21756 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
21759 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21760 vshl_u32 (uint32x2_t __a, int32x2_t __b)
21762 return __builtin_aarch64_ushlv2si_uus (__a, __b);
21765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21766 vshl_u64 (uint64x1_t __a, int64x1_t __b)
21768 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
21771 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21772 vshlq_s8 (int8x16_t __a, int8x16_t __b)
21774 return __builtin_aarch64_sshlv16qi (__a, __b);
21777 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21778 vshlq_s16 (int16x8_t __a, int16x8_t __b)
21780 return __builtin_aarch64_sshlv8hi (__a, __b);
21783 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21784 vshlq_s32 (int32x4_t __a, int32x4_t __b)
21786 return __builtin_aarch64_sshlv4si (__a, __b);
21789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21790 vshlq_s64 (int64x2_t __a, int64x2_t __b)
21792 return __builtin_aarch64_sshlv2di (__a, __b);
21795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21796 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
21798 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
21801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21802 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
21804 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
21807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21808 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
21810 return __builtin_aarch64_ushlv4si_uus (__a, __b);
21813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21814 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
21816 return __builtin_aarch64_ushlv2di_uus (__a, __b);
21819 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21820 vshld_s64 (int64_t __a, int64_t __b)
21822 return __builtin_aarch64_sshldi (__a, __b);
21825 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21826 vshld_u64 (uint64_t __a, uint64_t __b)
21828 return __builtin_aarch64_ushldi_uus (__a, __b);
21831 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21832 vshll_high_n_s8 (int8x16_t __a, const int __b)
21834 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
21837 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21838 vshll_high_n_s16 (int16x8_t __a, const int __b)
21840 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
21843 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21844 vshll_high_n_s32 (int32x4_t __a, const int __b)
21846 return __builtin_aarch64_sshll2_nv4si (__a, __b);
21849 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21850 vshll_high_n_u8 (uint8x16_t __a, const int __b)
21852 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
21855 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21856 vshll_high_n_u16 (uint16x8_t __a, const int __b)
21858 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
21861 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21862 vshll_high_n_u32 (uint32x4_t __a, const int __b)
21864 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
21867 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21868 vshll_n_s8 (int8x8_t __a, const int __b)
21870 return __builtin_aarch64_sshll_nv8qi (__a, __b);
21873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21874 vshll_n_s16 (int16x4_t __a, const int __b)
21876 return __builtin_aarch64_sshll_nv4hi (__a, __b);
21879 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21880 vshll_n_s32 (int32x2_t __a, const int __b)
21882 return __builtin_aarch64_sshll_nv2si (__a, __b);
21885 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21886 vshll_n_u8 (uint8x8_t __a, const int __b)
21888 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
21891 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21892 vshll_n_u16 (uint16x4_t __a, const int __b)
21894 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
21897 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21898 vshll_n_u32 (uint32x2_t __a, const int __b)
21900 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
21903 /* vshr */
21905 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21906 vshr_n_s8 (int8x8_t __a, const int __b)
21908 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
21911 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21912 vshr_n_s16 (int16x4_t __a, const int __b)
21914 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
21917 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21918 vshr_n_s32 (int32x2_t __a, const int __b)
21920 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
21923 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21924 vshr_n_s64 (int64x1_t __a, const int __b)
21926 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
21929 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21930 vshr_n_u8 (uint8x8_t __a, const int __b)
21932 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
21935 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21936 vshr_n_u16 (uint16x4_t __a, const int __b)
21938 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
21941 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21942 vshr_n_u32 (uint32x2_t __a, const int __b)
21944 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
21947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21948 vshr_n_u64 (uint64x1_t __a, const int __b)
21950 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
21953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21954 vshrq_n_s8 (int8x16_t __a, const int __b)
21956 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
21959 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21960 vshrq_n_s16 (int16x8_t __a, const int __b)
21962 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
21965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21966 vshrq_n_s32 (int32x4_t __a, const int __b)
21968 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
21971 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21972 vshrq_n_s64 (int64x2_t __a, const int __b)
21974 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
21977 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21978 vshrq_n_u8 (uint8x16_t __a, const int __b)
21980 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
21983 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21984 vshrq_n_u16 (uint16x8_t __a, const int __b)
21986 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
21989 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21990 vshrq_n_u32 (uint32x4_t __a, const int __b)
21992 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
21995 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21996 vshrq_n_u64 (uint64x2_t __a, const int __b)
21998 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22001 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22002 vshrd_n_s64 (int64_t __a, const int __b)
22004 return __builtin_aarch64_ashr_simddi (__a, __b);
22007 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22008 vshrd_n_u64 (uint64_t __a, const int __b)
22010 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22013 /* vsli */
22015 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22016 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22018 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22021 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22022 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22024 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22027 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22028 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22030 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22033 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22034 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22036 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22039 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22040 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22042 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22045 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22046 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22048 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22051 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22052 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22054 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22057 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22058 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22060 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22063 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22064 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22066 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22069 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22070 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22072 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22075 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22076 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22078 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22082 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22084 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22087 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22088 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22090 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22093 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22094 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22096 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22099 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22100 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22102 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22105 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22106 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22108 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22111 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22112 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22114 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22117 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22118 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22120 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22123 /* vsqadd */
22125 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22126 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22128 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22131 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22132 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22134 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22137 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22138 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22140 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22143 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22144 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22146 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22149 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22150 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22152 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22155 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22156 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22158 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22162 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22164 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22167 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22168 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22170 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22173 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22174 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
22176 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22179 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22180 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
22182 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22185 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22186 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
22188 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22191 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22192 vsqaddd_u64 (uint64_t __a, int64_t __b)
22194 return __builtin_aarch64_usqadddi_uus (__a, __b);
22197 /* vsqrt */
22198 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22199 vsqrt_f32 (float32x2_t a)
22201 return __builtin_aarch64_sqrtv2sf (a);
22204 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22205 vsqrtq_f32 (float32x4_t a)
22207 return __builtin_aarch64_sqrtv4sf (a);
22210 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22211 vsqrtq_f64 (float64x2_t a)
22213 return __builtin_aarch64_sqrtv2df (a);
22216 /* vsra */
22218 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22219 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22221 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22225 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22227 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22231 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22233 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22237 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22239 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22243 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22245 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22248 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22249 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22251 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22254 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22255 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22257 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22260 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22261 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22263 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22266 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22267 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22269 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22273 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22275 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22279 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22281 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22285 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22287 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22290 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22291 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22293 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22297 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22299 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22302 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22303 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22305 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22308 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22309 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22311 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22314 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22315 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22317 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22320 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22321 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22323 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22326 /* vsri */
22328 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22329 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22331 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22334 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22335 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22337 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22340 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22341 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22343 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22346 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22347 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22349 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22352 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22353 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22355 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22358 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22359 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22361 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22364 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22365 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22367 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22371 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22373 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22376 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22377 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22379 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22382 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22383 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22385 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22389 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22391 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22394 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22395 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22397 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22400 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22401 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22403 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22406 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22407 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22409 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22413 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22415 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22419 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22421 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22424 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22425 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22427 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22430 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22431 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22433 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22436 /* vst1 */
22438 __extension__ static __inline void __attribute__ ((__always_inline__))
22439 vst1_f32 (float32_t *a, float32x2_t b)
22441 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22444 __extension__ static __inline void __attribute__ ((__always_inline__))
22445 vst1_f64 (float64_t *a, float64x1_t b)
22447 *a = b[0];
22450 __extension__ static __inline void __attribute__ ((__always_inline__))
22451 vst1_p8 (poly8_t *a, poly8x8_t b)
22453 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22454 (int8x8_t) b);
22457 __extension__ static __inline void __attribute__ ((__always_inline__))
22458 vst1_p16 (poly16_t *a, poly16x4_t b)
22460 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22461 (int16x4_t) b);
22464 __extension__ static __inline void __attribute__ ((__always_inline__))
22465 vst1_s8 (int8_t *a, int8x8_t b)
22467 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22470 __extension__ static __inline void __attribute__ ((__always_inline__))
22471 vst1_s16 (int16_t *a, int16x4_t b)
22473 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22476 __extension__ static __inline void __attribute__ ((__always_inline__))
22477 vst1_s32 (int32_t *a, int32x2_t b)
22479 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22482 __extension__ static __inline void __attribute__ ((__always_inline__))
22483 vst1_s64 (int64_t *a, int64x1_t b)
22485 *a = b[0];
22488 __extension__ static __inline void __attribute__ ((__always_inline__))
22489 vst1_u8 (uint8_t *a, uint8x8_t b)
22491 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22492 (int8x8_t) b);
22495 __extension__ static __inline void __attribute__ ((__always_inline__))
22496 vst1_u16 (uint16_t *a, uint16x4_t b)
22498 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22499 (int16x4_t) b);
22502 __extension__ static __inline void __attribute__ ((__always_inline__))
22503 vst1_u32 (uint32_t *a, uint32x2_t b)
22505 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22506 (int32x2_t) b);
22509 __extension__ static __inline void __attribute__ ((__always_inline__))
22510 vst1_u64 (uint64_t *a, uint64x1_t b)
22512 *a = b[0];
22515 __extension__ static __inline void __attribute__ ((__always_inline__))
22516 vst1q_f32 (float32_t *a, float32x4_t b)
22518 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22521 __extension__ static __inline void __attribute__ ((__always_inline__))
22522 vst1q_f64 (float64_t *a, float64x2_t b)
22524 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22527 /* vst1q */
22529 __extension__ static __inline void __attribute__ ((__always_inline__))
22530 vst1q_p8 (poly8_t *a, poly8x16_t b)
22532 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22533 (int8x16_t) b);
22536 __extension__ static __inline void __attribute__ ((__always_inline__))
22537 vst1q_p16 (poly16_t *a, poly16x8_t b)
22539 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22540 (int16x8_t) b);
22543 __extension__ static __inline void __attribute__ ((__always_inline__))
22544 vst1q_s8 (int8_t *a, int8x16_t b)
22546 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22549 __extension__ static __inline void __attribute__ ((__always_inline__))
22550 vst1q_s16 (int16_t *a, int16x8_t b)
22552 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22555 __extension__ static __inline void __attribute__ ((__always_inline__))
22556 vst1q_s32 (int32_t *a, int32x4_t b)
22558 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22561 __extension__ static __inline void __attribute__ ((__always_inline__))
22562 vst1q_s64 (int64_t *a, int64x2_t b)
22564 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22567 __extension__ static __inline void __attribute__ ((__always_inline__))
22568 vst1q_u8 (uint8_t *a, uint8x16_t b)
22570 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22571 (int8x16_t) b);
22574 __extension__ static __inline void __attribute__ ((__always_inline__))
22575 vst1q_u16 (uint16_t *a, uint16x8_t b)
22577 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22578 (int16x8_t) b);
22581 __extension__ static __inline void __attribute__ ((__always_inline__))
22582 vst1q_u32 (uint32_t *a, uint32x4_t b)
22584 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22585 (int32x4_t) b);
22588 __extension__ static __inline void __attribute__ ((__always_inline__))
22589 vst1q_u64 (uint64_t *a, uint64x2_t b)
22591 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22592 (int64x2_t) b);
22595 /* vstn */
22597 __extension__ static __inline void
22598 vst2_s64 (int64_t * __a, int64x1x2_t val)
22600 __builtin_aarch64_simd_oi __o;
22601 int64x2x2_t temp;
22602 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22603 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22604 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22605 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22606 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22609 __extension__ static __inline void
22610 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
22612 __builtin_aarch64_simd_oi __o;
22613 uint64x2x2_t temp;
22614 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22615 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22616 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22617 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22618 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22621 __extension__ static __inline void
22622 vst2_f64 (float64_t * __a, float64x1x2_t val)
22624 __builtin_aarch64_simd_oi __o;
22625 float64x2x2_t temp;
22626 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22627 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22628 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
22629 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
22630 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
22633 __extension__ static __inline void
22634 vst2_s8 (int8_t * __a, int8x8x2_t val)
22636 __builtin_aarch64_simd_oi __o;
22637 int8x16x2_t temp;
22638 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22639 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22640 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22641 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22642 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22645 __extension__ static __inline void __attribute__ ((__always_inline__))
22646 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
22648 __builtin_aarch64_simd_oi __o;
22649 poly8x16x2_t temp;
22650 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22651 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22652 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22653 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22654 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22657 __extension__ static __inline void __attribute__ ((__always_inline__))
22658 vst2_s16 (int16_t * __a, int16x4x2_t val)
22660 __builtin_aarch64_simd_oi __o;
22661 int16x8x2_t temp;
22662 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22663 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22664 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22665 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22666 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22669 __extension__ static __inline void __attribute__ ((__always_inline__))
22670 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
22672 __builtin_aarch64_simd_oi __o;
22673 poly16x8x2_t temp;
22674 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22675 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22676 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22677 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22678 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22681 __extension__ static __inline void __attribute__ ((__always_inline__))
22682 vst2_s32 (int32_t * __a, int32x2x2_t val)
22684 __builtin_aarch64_simd_oi __o;
22685 int32x4x2_t temp;
22686 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22687 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22688 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22689 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22690 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22693 __extension__ static __inline void __attribute__ ((__always_inline__))
22694 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
22696 __builtin_aarch64_simd_oi __o;
22697 uint8x16x2_t temp;
22698 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22699 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22700 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22701 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22702 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22705 __extension__ static __inline void __attribute__ ((__always_inline__))
22706 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
22708 __builtin_aarch64_simd_oi __o;
22709 uint16x8x2_t temp;
22710 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22711 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22712 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22713 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22714 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22717 __extension__ static __inline void __attribute__ ((__always_inline__))
22718 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
22720 __builtin_aarch64_simd_oi __o;
22721 uint32x4x2_t temp;
22722 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22723 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22724 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22725 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22726 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22729 __extension__ static __inline void __attribute__ ((__always_inline__))
22730 vst2_f32 (float32_t * __a, float32x2x2_t val)
22732 __builtin_aarch64_simd_oi __o;
22733 float32x4x2_t temp;
22734 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22735 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22736 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
22737 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
22738 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
22741 __extension__ static __inline void __attribute__ ((__always_inline__))
22742 vst2q_s8 (int8_t * __a, int8x16x2_t val)
22744 __builtin_aarch64_simd_oi __o;
22745 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22746 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22747 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22750 __extension__ static __inline void __attribute__ ((__always_inline__))
22751 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
22753 __builtin_aarch64_simd_oi __o;
22754 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22755 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22756 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22759 __extension__ static __inline void __attribute__ ((__always_inline__))
22760 vst2q_s16 (int16_t * __a, int16x8x2_t val)
22762 __builtin_aarch64_simd_oi __o;
22763 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22764 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22765 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22768 __extension__ static __inline void __attribute__ ((__always_inline__))
22769 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
22771 __builtin_aarch64_simd_oi __o;
22772 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22773 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22774 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22777 __extension__ static __inline void __attribute__ ((__always_inline__))
22778 vst2q_s32 (int32_t * __a, int32x4x2_t val)
22780 __builtin_aarch64_simd_oi __o;
22781 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22782 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22783 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22786 __extension__ static __inline void __attribute__ ((__always_inline__))
22787 vst2q_s64 (int64_t * __a, int64x2x2_t val)
22789 __builtin_aarch64_simd_oi __o;
22790 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22791 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22792 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22795 __extension__ static __inline void __attribute__ ((__always_inline__))
22796 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
22798 __builtin_aarch64_simd_oi __o;
22799 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22800 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22801 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22804 __extension__ static __inline void __attribute__ ((__always_inline__))
22805 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
22807 __builtin_aarch64_simd_oi __o;
22808 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22809 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22810 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22813 __extension__ static __inline void __attribute__ ((__always_inline__))
22814 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
22816 __builtin_aarch64_simd_oi __o;
22817 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22818 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22819 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22822 __extension__ static __inline void __attribute__ ((__always_inline__))
22823 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
22825 __builtin_aarch64_simd_oi __o;
22826 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22827 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22828 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22831 __extension__ static __inline void __attribute__ ((__always_inline__))
22832 vst2q_f32 (float32_t * __a, float32x4x2_t val)
22834 __builtin_aarch64_simd_oi __o;
22835 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
22836 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
22837 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
22840 __extension__ static __inline void __attribute__ ((__always_inline__))
22841 vst2q_f64 (float64_t * __a, float64x2x2_t val)
22843 __builtin_aarch64_simd_oi __o;
22844 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
22845 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
22846 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
22849 __extension__ static __inline void
22850 vst3_s64 (int64_t * __a, int64x1x3_t val)
22852 __builtin_aarch64_simd_ci __o;
22853 int64x2x3_t temp;
22854 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22855 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22856 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
22857 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22858 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22859 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22860 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22863 __extension__ static __inline void
22864 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
22866 __builtin_aarch64_simd_ci __o;
22867 uint64x2x3_t temp;
22868 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22869 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22870 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
22871 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22872 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22873 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22874 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22877 __extension__ static __inline void
22878 vst3_f64 (float64_t * __a, float64x1x3_t val)
22880 __builtin_aarch64_simd_ci __o;
22881 float64x2x3_t temp;
22882 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22883 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22884 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
22885 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
22886 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
22887 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
22888 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
22891 __extension__ static __inline void
22892 vst3_s8 (int8_t * __a, int8x8x3_t val)
22894 __builtin_aarch64_simd_ci __o;
22895 int8x16x3_t temp;
22896 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22897 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22898 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
22899 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22900 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22901 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22902 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22905 __extension__ static __inline void __attribute__ ((__always_inline__))
22906 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
22908 __builtin_aarch64_simd_ci __o;
22909 poly8x16x3_t temp;
22910 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22911 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22912 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
22913 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22914 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22915 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22916 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22919 __extension__ static __inline void __attribute__ ((__always_inline__))
22920 vst3_s16 (int16_t * __a, int16x4x3_t val)
22922 __builtin_aarch64_simd_ci __o;
22923 int16x8x3_t temp;
22924 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22925 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22926 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
22927 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22928 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22929 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22930 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22933 __extension__ static __inline void __attribute__ ((__always_inline__))
22934 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
22936 __builtin_aarch64_simd_ci __o;
22937 poly16x8x3_t temp;
22938 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22939 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22940 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
22941 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22942 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22943 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22944 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22947 __extension__ static __inline void __attribute__ ((__always_inline__))
22948 vst3_s32 (int32_t * __a, int32x2x3_t val)
22950 __builtin_aarch64_simd_ci __o;
22951 int32x4x3_t temp;
22952 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22953 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22954 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
22955 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22956 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22957 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
22958 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
22961 __extension__ static __inline void __attribute__ ((__always_inline__))
22962 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
22964 __builtin_aarch64_simd_ci __o;
22965 uint8x16x3_t temp;
22966 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22967 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22968 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
22969 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22970 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22971 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22972 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22975 __extension__ static __inline void __attribute__ ((__always_inline__))
22976 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
22978 __builtin_aarch64_simd_ci __o;
22979 uint16x8x3_t temp;
22980 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22981 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22982 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
22983 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22984 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22985 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22986 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22989 __extension__ static __inline void __attribute__ ((__always_inline__))
22990 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
22992 __builtin_aarch64_simd_ci __o;
22993 uint32x4x3_t temp;
22994 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22995 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22996 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
22997 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22998 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22999 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23000 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23003 __extension__ static __inline void __attribute__ ((__always_inline__))
23004 vst3_f32 (float32_t * __a, float32x2x3_t val)
23006 __builtin_aarch64_simd_ci __o;
23007 float32x4x3_t temp;
23008 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23009 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23010 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23011 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23012 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23013 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23014 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23017 __extension__ static __inline void __attribute__ ((__always_inline__))
23018 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23020 __builtin_aarch64_simd_ci __o;
23021 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23022 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23023 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23024 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23027 __extension__ static __inline void __attribute__ ((__always_inline__))
23028 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23030 __builtin_aarch64_simd_ci __o;
23031 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23032 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23033 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23034 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23037 __extension__ static __inline void __attribute__ ((__always_inline__))
23038 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23040 __builtin_aarch64_simd_ci __o;
23041 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23042 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23043 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23044 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23047 __extension__ static __inline void __attribute__ ((__always_inline__))
23048 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23050 __builtin_aarch64_simd_ci __o;
23051 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23052 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23053 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23054 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23057 __extension__ static __inline void __attribute__ ((__always_inline__))
23058 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23060 __builtin_aarch64_simd_ci __o;
23061 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23062 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23063 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23064 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23067 __extension__ static __inline void __attribute__ ((__always_inline__))
23068 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23070 __builtin_aarch64_simd_ci __o;
23071 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23072 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23073 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23074 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23077 __extension__ static __inline void __attribute__ ((__always_inline__))
23078 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23080 __builtin_aarch64_simd_ci __o;
23081 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23082 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23083 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23084 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23087 __extension__ static __inline void __attribute__ ((__always_inline__))
23088 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23090 __builtin_aarch64_simd_ci __o;
23091 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23092 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23093 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23094 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23097 __extension__ static __inline void __attribute__ ((__always_inline__))
23098 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23100 __builtin_aarch64_simd_ci __o;
23101 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23102 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23103 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23104 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23107 __extension__ static __inline void __attribute__ ((__always_inline__))
23108 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23110 __builtin_aarch64_simd_ci __o;
23111 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23112 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23113 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23114 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23117 __extension__ static __inline void __attribute__ ((__always_inline__))
23118 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23120 __builtin_aarch64_simd_ci __o;
23121 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23122 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23123 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23124 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23127 __extension__ static __inline void __attribute__ ((__always_inline__))
23128 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23130 __builtin_aarch64_simd_ci __o;
23131 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23132 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23133 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23134 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23137 __extension__ static __inline void
23138 vst4_s64 (int64_t * __a, int64x1x4_t val)
23140 __builtin_aarch64_simd_xi __o;
23141 int64x2x4_t temp;
23142 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23143 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23144 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23145 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23146 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23147 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23148 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23149 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23150 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23153 __extension__ static __inline void
23154 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23156 __builtin_aarch64_simd_xi __o;
23157 uint64x2x4_t temp;
23158 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23159 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23160 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23161 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23162 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23163 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23164 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23165 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23166 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23169 __extension__ static __inline void
23170 vst4_f64 (float64_t * __a, float64x1x4_t val)
23172 __builtin_aarch64_simd_xi __o;
23173 float64x2x4_t temp;
23174 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23175 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23176 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23177 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23178 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23179 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23180 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23181 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23182 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23185 __extension__ static __inline void
23186 vst4_s8 (int8_t * __a, int8x8x4_t val)
23188 __builtin_aarch64_simd_xi __o;
23189 int8x16x4_t temp;
23190 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23191 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23192 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23193 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23194 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23195 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23196 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23197 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23198 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23201 __extension__ static __inline void __attribute__ ((__always_inline__))
23202 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23204 __builtin_aarch64_simd_xi __o;
23205 poly8x16x4_t temp;
23206 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23207 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23208 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23209 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23210 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23211 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23212 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23213 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23214 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23217 __extension__ static __inline void __attribute__ ((__always_inline__))
23218 vst4_s16 (int16_t * __a, int16x4x4_t val)
23220 __builtin_aarch64_simd_xi __o;
23221 int16x8x4_t temp;
23222 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23223 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23224 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23225 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23226 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23227 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23228 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23229 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23230 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23233 __extension__ static __inline void __attribute__ ((__always_inline__))
23234 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23236 __builtin_aarch64_simd_xi __o;
23237 poly16x8x4_t temp;
23238 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23239 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23240 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23241 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23242 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23243 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23244 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23245 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23246 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23249 __extension__ static __inline void __attribute__ ((__always_inline__))
23250 vst4_s32 (int32_t * __a, int32x2x4_t val)
23252 __builtin_aarch64_simd_xi __o;
23253 int32x4x4_t temp;
23254 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23255 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23256 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23257 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23258 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23259 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23260 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23261 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23262 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23265 __extension__ static __inline void __attribute__ ((__always_inline__))
23266 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23268 __builtin_aarch64_simd_xi __o;
23269 uint8x16x4_t temp;
23270 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23271 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23272 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23273 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23274 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23275 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23276 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23277 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23278 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23281 __extension__ static __inline void __attribute__ ((__always_inline__))
23282 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23284 __builtin_aarch64_simd_xi __o;
23285 uint16x8x4_t temp;
23286 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23287 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23288 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23289 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23290 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23291 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23292 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23293 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23294 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23297 __extension__ static __inline void __attribute__ ((__always_inline__))
23298 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23300 __builtin_aarch64_simd_xi __o;
23301 uint32x4x4_t temp;
23302 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23303 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23304 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23305 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23306 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23307 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23308 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23309 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23310 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23313 __extension__ static __inline void __attribute__ ((__always_inline__))
23314 vst4_f32 (float32_t * __a, float32x2x4_t val)
23316 __builtin_aarch64_simd_xi __o;
23317 float32x4x4_t temp;
23318 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23319 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23320 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23321 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23322 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23323 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23324 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23325 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23326 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23329 __extension__ static __inline void __attribute__ ((__always_inline__))
23330 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23332 __builtin_aarch64_simd_xi __o;
23333 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23334 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23335 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23336 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23337 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23340 __extension__ static __inline void __attribute__ ((__always_inline__))
23341 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23343 __builtin_aarch64_simd_xi __o;
23344 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23345 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23346 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23347 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23348 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23351 __extension__ static __inline void __attribute__ ((__always_inline__))
23352 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23354 __builtin_aarch64_simd_xi __o;
23355 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23356 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23357 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23358 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23359 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23362 __extension__ static __inline void __attribute__ ((__always_inline__))
23363 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23365 __builtin_aarch64_simd_xi __o;
23366 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23367 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23368 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23369 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23370 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23373 __extension__ static __inline void __attribute__ ((__always_inline__))
23374 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23376 __builtin_aarch64_simd_xi __o;
23377 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23378 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23379 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23380 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23381 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23384 __extension__ static __inline void __attribute__ ((__always_inline__))
23385 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23387 __builtin_aarch64_simd_xi __o;
23388 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23389 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23390 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23391 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23392 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23395 __extension__ static __inline void __attribute__ ((__always_inline__))
23396 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23398 __builtin_aarch64_simd_xi __o;
23399 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23400 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23401 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23402 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23403 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23406 __extension__ static __inline void __attribute__ ((__always_inline__))
23407 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23409 __builtin_aarch64_simd_xi __o;
23410 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23411 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23412 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23413 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23414 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23417 __extension__ static __inline void __attribute__ ((__always_inline__))
23418 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23420 __builtin_aarch64_simd_xi __o;
23421 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23422 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23423 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23424 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23425 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23428 __extension__ static __inline void __attribute__ ((__always_inline__))
23429 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23431 __builtin_aarch64_simd_xi __o;
23432 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23433 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23434 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23435 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23436 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23439 __extension__ static __inline void __attribute__ ((__always_inline__))
23440 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23442 __builtin_aarch64_simd_xi __o;
23443 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23444 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23445 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23446 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23447 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23450 __extension__ static __inline void __attribute__ ((__always_inline__))
23451 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23453 __builtin_aarch64_simd_xi __o;
23454 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23455 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23456 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
23457 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
23458 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
23461 /* vsub */
23463 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23464 vsubd_s64 (int64_t __a, int64_t __b)
23466 return __a - __b;
23469 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23470 vsubd_u64 (uint64_t __a, uint64_t __b)
23472 return __a - __b;
23475 /* vtbx1 */
23477 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23478 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
23480 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23481 vmov_n_u8 (8));
23482 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
23484 return vbsl_s8 (__mask, __tbl, __r);
23487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23488 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
23490 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23491 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
23493 return vbsl_u8 (__mask, __tbl, __r);
23496 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23497 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
23499 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23500 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
23502 return vbsl_p8 (__mask, __tbl, __r);
23505 /* vtbx3 */
23507 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23508 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
23510 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23511 vmov_n_u8 (24));
23512 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
23514 return vbsl_s8 (__mask, __tbl, __r);
23517 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23518 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
23520 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23521 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
23523 return vbsl_u8 (__mask, __tbl, __r);
23526 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23527 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
23529 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23530 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
23532 return vbsl_p8 (__mask, __tbl, __r);
23535 /* vtrn */
23537 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23538 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
23540 #ifdef __AARCH64EB__
23541 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23542 #else
23543 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23544 #endif
23547 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23548 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
23550 #ifdef __AARCH64EB__
23551 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23552 #else
23553 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23554 #endif
23557 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23558 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
23560 #ifdef __AARCH64EB__
23561 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23562 #else
23563 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23564 #endif
23567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23568 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
23570 #ifdef __AARCH64EB__
23571 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23572 #else
23573 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23574 #endif
23577 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23578 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
23580 #ifdef __AARCH64EB__
23581 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23582 #else
23583 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23584 #endif
23587 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23588 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
23590 #ifdef __AARCH64EB__
23591 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23592 #else
23593 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23594 #endif
23597 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23598 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
23600 #ifdef __AARCH64EB__
23601 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23602 #else
23603 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23604 #endif
23607 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23608 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
23610 #ifdef __AARCH64EB__
23611 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23612 #else
23613 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23614 #endif
23617 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23618 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
23620 #ifdef __AARCH64EB__
23621 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23622 #else
23623 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23624 #endif
23627 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23628 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
23630 #ifdef __AARCH64EB__
23631 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23632 #else
23633 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23634 #endif
23637 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23638 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
23640 #ifdef __AARCH64EB__
23641 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23642 #else
23643 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23644 #endif
23647 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23648 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
23650 #ifdef __AARCH64EB__
23651 return __builtin_shuffle (__a, __b,
23652 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23653 #else
23654 return __builtin_shuffle (__a, __b,
23655 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23656 #endif
23659 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23660 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
23662 #ifdef __AARCH64EB__
23663 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23664 #else
23665 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23666 #endif
23669 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23670 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
23672 #ifdef __AARCH64EB__
23673 return __builtin_shuffle (__a, __b,
23674 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23675 #else
23676 return __builtin_shuffle (__a, __b,
23677 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23678 #endif
23681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23682 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
23684 #ifdef __AARCH64EB__
23685 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23686 #else
23687 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23688 #endif
23691 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23692 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
23694 #ifdef __AARCH64EB__
23695 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23696 #else
23697 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23698 #endif
23701 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23702 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
23704 #ifdef __AARCH64EB__
23705 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23706 #else
23707 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23708 #endif
23711 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23712 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
23714 #ifdef __AARCH64EB__
23715 return __builtin_shuffle (__a, __b,
23716 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23717 #else
23718 return __builtin_shuffle (__a, __b,
23719 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23720 #endif
23723 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23724 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
23726 #ifdef __AARCH64EB__
23727 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23728 #else
23729 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23730 #endif
23733 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23734 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
23736 #ifdef __AARCH64EB__
23737 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23738 #else
23739 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23740 #endif
23743 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23744 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
23746 #ifdef __AARCH64EB__
23747 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23748 #else
23749 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23750 #endif
23753 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23754 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
23756 #ifdef __AARCH64EB__
23757 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23758 #else
23759 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23760 #endif
23763 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23764 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
23766 #ifdef __AARCH64EB__
23767 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23768 #else
23769 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23770 #endif
23773 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23774 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
23776 #ifdef __AARCH64EB__
23777 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23778 #else
23779 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23780 #endif
23783 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23784 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
23786 #ifdef __AARCH64EB__
23787 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23788 #else
23789 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23790 #endif
23793 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23794 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
23796 #ifdef __AARCH64EB__
23797 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23798 #else
23799 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23800 #endif
23803 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23804 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
23806 #ifdef __AARCH64EB__
23807 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23808 #else
23809 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23810 #endif
23813 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23814 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
23816 #ifdef __AARCH64EB__
23817 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23818 #else
23819 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23820 #endif
23823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23824 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
23826 #ifdef __AARCH64EB__
23827 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23828 #else
23829 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23830 #endif
23833 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23834 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
23836 #ifdef __AARCH64EB__
23837 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23838 #else
23839 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23840 #endif
23843 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23844 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
23846 #ifdef __AARCH64EB__
23847 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23848 #else
23849 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23850 #endif
23853 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23854 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
23856 #ifdef __AARCH64EB__
23857 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23858 #else
23859 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23860 #endif
23863 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23864 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
23866 #ifdef __AARCH64EB__
23867 return __builtin_shuffle (__a, __b,
23868 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23869 #else
23870 return __builtin_shuffle (__a, __b,
23871 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23872 #endif
23875 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23876 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
23878 #ifdef __AARCH64EB__
23879 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23880 #else
23881 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23882 #endif
23885 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23886 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
23888 #ifdef __AARCH64EB__
23889 return __builtin_shuffle (__a, __b,
23890 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23891 #else
23892 return __builtin_shuffle (__a, __b,
23893 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23894 #endif
23897 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23898 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
23900 #ifdef __AARCH64EB__
23901 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23902 #else
23903 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23904 #endif
23907 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23908 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
23910 #ifdef __AARCH64EB__
23911 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23912 #else
23913 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23914 #endif
23917 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23918 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
23920 #ifdef __AARCH64EB__
23921 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23922 #else
23923 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23924 #endif
23927 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23928 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
23930 #ifdef __AARCH64EB__
23931 return __builtin_shuffle (__a, __b,
23932 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23933 #else
23934 return __builtin_shuffle (__a, __b,
23935 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23936 #endif
23939 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23940 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
23942 #ifdef __AARCH64EB__
23943 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23944 #else
23945 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23946 #endif
23949 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23950 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
23952 #ifdef __AARCH64EB__
23953 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23954 #else
23955 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23956 #endif
23959 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23960 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
23962 #ifdef __AARCH64EB__
23963 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23964 #else
23965 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23966 #endif
23969 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
23970 vtrn_f32 (float32x2_t a, float32x2_t b)
23972 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
23975 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
23976 vtrn_p8 (poly8x8_t a, poly8x8_t b)
23978 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
23981 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
23982 vtrn_p16 (poly16x4_t a, poly16x4_t b)
23984 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
23987 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
23988 vtrn_s8 (int8x8_t a, int8x8_t b)
23990 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
23993 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
23994 vtrn_s16 (int16x4_t a, int16x4_t b)
23996 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
23999 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24000 vtrn_s32 (int32x2_t a, int32x2_t b)
24002 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24005 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24006 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24008 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24011 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24012 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24014 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24017 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24018 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24020 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24023 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24024 vtrnq_f32 (float32x4_t a, float32x4_t b)
24026 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24029 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24030 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24032 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24035 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24036 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24038 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24041 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24042 vtrnq_s8 (int8x16_t a, int8x16_t b)
24044 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24047 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24048 vtrnq_s16 (int16x8_t a, int16x8_t b)
24050 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24053 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24054 vtrnq_s32 (int32x4_t a, int32x4_t b)
24056 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24059 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24060 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24062 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24065 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24066 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24068 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24071 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24072 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24074 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24077 /* vtst */
24079 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24080 vtst_s8 (int8x8_t __a, int8x8_t __b)
24082 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
24085 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24086 vtst_s16 (int16x4_t __a, int16x4_t __b)
24088 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
24091 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24092 vtst_s32 (int32x2_t __a, int32x2_t __b)
24094 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
24097 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24098 vtst_s64 (int64x1_t __a, int64x1_t __b)
24100 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24103 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24104 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24106 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
24107 (int8x8_t) __b);
24110 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24111 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24113 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
24114 (int16x4_t) __b);
24117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24118 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24120 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
24121 (int32x2_t) __b);
24124 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24125 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24127 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24130 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24131 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24133 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
24136 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24137 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24139 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
24142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24143 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24145 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
24148 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24149 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24151 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
24154 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24155 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24157 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
24158 (int8x16_t) __b);
24161 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24162 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24164 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
24165 (int16x8_t) __b);
24168 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24169 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24171 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
24172 (int32x4_t) __b);
24175 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24176 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24178 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
24179 (int64x2_t) __b);
24182 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24183 vtstd_s64 (int64_t __a, int64_t __b)
24185 return (__a & __b) ? -1ll : 0ll;
24188 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24189 vtstd_u64 (uint64_t __a, uint64_t __b)
24191 return (__a & __b) ? -1ll : 0ll;
24194 /* vuqadd */
24196 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24197 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24199 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24202 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24203 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24205 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24208 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24209 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24211 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24214 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24215 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24217 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24220 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24221 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24223 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24226 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24227 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24229 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24233 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24235 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24238 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24239 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24241 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24244 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
24245 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
24247 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24250 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
24251 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
24253 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24256 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
24257 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
24259 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24262 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24263 vuqaddd_s64 (int64_t __a, uint64_t __b)
24265 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24268 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24269 __extension__ static __inline rettype \
24270 __attribute__ ((__always_inline__)) \
24271 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24273 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24274 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24277 #define __INTERLEAVE_LIST(op) \
24278 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24279 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24280 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24281 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24282 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24283 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24284 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24285 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24286 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24287 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24288 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24289 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24290 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24291 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24292 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24293 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24294 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24295 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24297 /* vuzp */
24299 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24300 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24302 #ifdef __AARCH64EB__
24303 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24304 #else
24305 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24306 #endif
24309 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24310 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24312 #ifdef __AARCH64EB__
24313 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24314 #else
24315 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24316 #endif
24319 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24320 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24322 #ifdef __AARCH64EB__
24323 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24324 #else
24325 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24326 #endif
24329 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24330 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24332 #ifdef __AARCH64EB__
24333 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24334 #else
24335 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24336 #endif
24339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24340 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24342 #ifdef __AARCH64EB__
24343 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24344 #else
24345 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24346 #endif
24349 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24350 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24352 #ifdef __AARCH64EB__
24353 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24354 #else
24355 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24356 #endif
24359 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24360 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24362 #ifdef __AARCH64EB__
24363 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24364 #else
24365 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24366 #endif
24369 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24370 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24372 #ifdef __AARCH64EB__
24373 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24374 #else
24375 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24376 #endif
24379 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24380 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24382 #ifdef __AARCH64EB__
24383 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24384 #else
24385 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24386 #endif
24389 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24390 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24392 #ifdef __AARCH64EB__
24393 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24394 #else
24395 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24396 #endif
24399 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24400 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24402 #ifdef __AARCH64EB__
24403 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24404 #else
24405 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24406 #endif
24409 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24410 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24412 #ifdef __AARCH64EB__
24413 return __builtin_shuffle (__a, __b, (uint8x16_t)
24414 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24415 #else
24416 return __builtin_shuffle (__a, __b, (uint8x16_t)
24417 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24418 #endif
24421 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24422 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
24424 #ifdef __AARCH64EB__
24425 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24426 #else
24427 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24428 #endif
24431 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24432 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
24434 #ifdef __AARCH64EB__
24435 return __builtin_shuffle (__a, __b,
24436 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24437 #else
24438 return __builtin_shuffle (__a, __b,
24439 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24440 #endif
24443 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24444 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
24446 #ifdef __AARCH64EB__
24447 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24448 #else
24449 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24450 #endif
24453 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24454 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
24456 #ifdef __AARCH64EB__
24457 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24458 #else
24459 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24460 #endif
24463 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24464 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
24466 #ifdef __AARCH64EB__
24467 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24468 #else
24469 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24470 #endif
24473 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24474 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
24476 #ifdef __AARCH64EB__
24477 return __builtin_shuffle (__a, __b,
24478 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24479 #else
24480 return __builtin_shuffle (__a, __b,
24481 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24482 #endif
24485 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24486 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
24488 #ifdef __AARCH64EB__
24489 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24490 #else
24491 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24492 #endif
24495 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24496 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
24498 #ifdef __AARCH64EB__
24499 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24500 #else
24501 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24502 #endif
24505 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24506 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
24508 #ifdef __AARCH64EB__
24509 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24510 #else
24511 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24512 #endif
24515 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24516 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
24518 #ifdef __AARCH64EB__
24519 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24520 #else
24521 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24522 #endif
24525 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24526 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
24528 #ifdef __AARCH64EB__
24529 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24530 #else
24531 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24532 #endif
24535 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24536 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
24538 #ifdef __AARCH64EB__
24539 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24540 #else
24541 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24542 #endif
24545 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24546 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
24548 #ifdef __AARCH64EB__
24549 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24550 #else
24551 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24552 #endif
24555 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24556 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
24558 #ifdef __AARCH64EB__
24559 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24560 #else
24561 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24562 #endif
24565 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24566 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
24568 #ifdef __AARCH64EB__
24569 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24570 #else
24571 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24572 #endif
24575 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24576 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
24578 #ifdef __AARCH64EB__
24579 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24580 #else
24581 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24582 #endif
24585 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24586 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
24588 #ifdef __AARCH64EB__
24589 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24590 #else
24591 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24592 #endif
24595 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24596 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
24598 #ifdef __AARCH64EB__
24599 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24600 #else
24601 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24602 #endif
24605 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24606 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
24608 #ifdef __AARCH64EB__
24609 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24610 #else
24611 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24612 #endif
24615 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24616 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
24618 #ifdef __AARCH64EB__
24619 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24620 #else
24621 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24622 #endif
24625 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24626 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
24628 #ifdef __AARCH64EB__
24629 return __builtin_shuffle (__a, __b,
24630 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24631 #else
24632 return __builtin_shuffle (__a, __b,
24633 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24634 #endif
24637 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24638 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
24640 #ifdef __AARCH64EB__
24641 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24642 #else
24643 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24644 #endif
24647 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24648 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
24650 #ifdef __AARCH64EB__
24651 return __builtin_shuffle (__a, __b,
24652 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24653 #else
24654 return __builtin_shuffle (__a, __b,
24655 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24656 #endif
24659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24660 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
24662 #ifdef __AARCH64EB__
24663 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24664 #else
24665 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24666 #endif
24669 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24670 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
24672 #ifdef __AARCH64EB__
24673 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24674 #else
24675 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24676 #endif
24679 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24680 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
24682 #ifdef __AARCH64EB__
24683 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24684 #else
24685 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24686 #endif
24689 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24690 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
24692 #ifdef __AARCH64EB__
24693 return __builtin_shuffle (__a, __b, (uint8x16_t)
24694 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24695 #else
24696 return __builtin_shuffle (__a, __b, (uint8x16_t)
24697 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24698 #endif
24701 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24702 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
24704 #ifdef __AARCH64EB__
24705 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24706 #else
24707 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24708 #endif
24711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24712 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
24714 #ifdef __AARCH64EB__
24715 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24716 #else
24717 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24718 #endif
24721 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24722 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
24724 #ifdef __AARCH64EB__
24725 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24726 #else
24727 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24728 #endif
24731 __INTERLEAVE_LIST (uzp)
24733 /* vzip */
24735 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24736 vzip1_f32 (float32x2_t __a, float32x2_t __b)
24738 #ifdef __AARCH64EB__
24739 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24740 #else
24741 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24742 #endif
24745 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24746 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
24748 #ifdef __AARCH64EB__
24749 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24750 #else
24751 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24752 #endif
24755 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24756 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
24758 #ifdef __AARCH64EB__
24759 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24760 #else
24761 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24762 #endif
24765 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24766 vzip1_s8 (int8x8_t __a, int8x8_t __b)
24768 #ifdef __AARCH64EB__
24769 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24770 #else
24771 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24772 #endif
24775 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24776 vzip1_s16 (int16x4_t __a, int16x4_t __b)
24778 #ifdef __AARCH64EB__
24779 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24780 #else
24781 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24782 #endif
24785 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24786 vzip1_s32 (int32x2_t __a, int32x2_t __b)
24788 #ifdef __AARCH64EB__
24789 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24790 #else
24791 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24792 #endif
24795 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24796 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
24798 #ifdef __AARCH64EB__
24799 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24800 #else
24801 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24802 #endif
24805 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24806 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
24808 #ifdef __AARCH64EB__
24809 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24810 #else
24811 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24812 #endif
24815 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24816 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
24818 #ifdef __AARCH64EB__
24819 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24820 #else
24821 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24822 #endif
24825 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24826 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
24828 #ifdef __AARCH64EB__
24829 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24830 #else
24831 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24832 #endif
24835 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24836 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
24838 #ifdef __AARCH64EB__
24839 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24840 #else
24841 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24842 #endif
24845 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24846 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
24848 #ifdef __AARCH64EB__
24849 return __builtin_shuffle (__a, __b, (uint8x16_t)
24850 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24851 #else
24852 return __builtin_shuffle (__a, __b, (uint8x16_t)
24853 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24854 #endif
24857 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24858 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
24860 #ifdef __AARCH64EB__
24861 return __builtin_shuffle (__a, __b, (uint16x8_t)
24862 {12, 4, 13, 5, 14, 6, 15, 7});
24863 #else
24864 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24865 #endif
24868 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24869 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
24871 #ifdef __AARCH64EB__
24872 return __builtin_shuffle (__a, __b, (uint8x16_t)
24873 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24874 #else
24875 return __builtin_shuffle (__a, __b, (uint8x16_t)
24876 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24877 #endif
24880 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24881 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
24883 #ifdef __AARCH64EB__
24884 return __builtin_shuffle (__a, __b, (uint16x8_t)
24885 {12, 4, 13, 5, 14, 6, 15, 7});
24886 #else
24887 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24888 #endif
24891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24892 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
24894 #ifdef __AARCH64EB__
24895 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24896 #else
24897 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24898 #endif
24901 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24902 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
24904 #ifdef __AARCH64EB__
24905 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24906 #else
24907 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24908 #endif
24911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24912 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
24914 #ifdef __AARCH64EB__
24915 return __builtin_shuffle (__a, __b, (uint8x16_t)
24916 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24917 #else
24918 return __builtin_shuffle (__a, __b, (uint8x16_t)
24919 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24920 #endif
24923 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24924 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
24926 #ifdef __AARCH64EB__
24927 return __builtin_shuffle (__a, __b, (uint16x8_t)
24928 {12, 4, 13, 5, 14, 6, 15, 7});
24929 #else
24930 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24931 #endif
24934 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24935 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
24937 #ifdef __AARCH64EB__
24938 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24939 #else
24940 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24941 #endif
24944 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24945 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
24947 #ifdef __AARCH64EB__
24948 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24949 #else
24950 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24951 #endif
24954 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24955 vzip2_f32 (float32x2_t __a, float32x2_t __b)
24957 #ifdef __AARCH64EB__
24958 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24959 #else
24960 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24961 #endif
24964 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24965 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
24967 #ifdef __AARCH64EB__
24968 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24969 #else
24970 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24971 #endif
24974 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24975 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
24977 #ifdef __AARCH64EB__
24978 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24979 #else
24980 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
24981 #endif
24984 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24985 vzip2_s8 (int8x8_t __a, int8x8_t __b)
24987 #ifdef __AARCH64EB__
24988 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24989 #else
24990 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24991 #endif
24994 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24995 vzip2_s16 (int16x4_t __a, int16x4_t __b)
24997 #ifdef __AARCH64EB__
24998 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24999 #else
25000 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25001 #endif
25004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25005 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25007 #ifdef __AARCH64EB__
25008 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25009 #else
25010 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25011 #endif
25014 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25015 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25017 #ifdef __AARCH64EB__
25018 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25019 #else
25020 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25021 #endif
25024 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25025 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25027 #ifdef __AARCH64EB__
25028 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25029 #else
25030 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25031 #endif
25034 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25035 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25037 #ifdef __AARCH64EB__
25038 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25039 #else
25040 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25041 #endif
25044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25045 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25047 #ifdef __AARCH64EB__
25048 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25049 #else
25050 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25051 #endif
25054 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25055 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25057 #ifdef __AARCH64EB__
25058 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25059 #else
25060 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25061 #endif
25064 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25065 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25067 #ifdef __AARCH64EB__
25068 return __builtin_shuffle (__a, __b, (uint8x16_t)
25069 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25070 #else
25071 return __builtin_shuffle (__a, __b, (uint8x16_t)
25072 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25073 #endif
25076 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25077 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25079 #ifdef __AARCH64EB__
25080 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25081 #else
25082 return __builtin_shuffle (__a, __b, (uint16x8_t)
25083 {4, 12, 5, 13, 6, 14, 7, 15});
25084 #endif
25087 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25088 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25090 #ifdef __AARCH64EB__
25091 return __builtin_shuffle (__a, __b, (uint8x16_t)
25092 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25093 #else
25094 return __builtin_shuffle (__a, __b, (uint8x16_t)
25095 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25096 #endif
25099 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25100 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25102 #ifdef __AARCH64EB__
25103 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25104 #else
25105 return __builtin_shuffle (__a, __b, (uint16x8_t)
25106 {4, 12, 5, 13, 6, 14, 7, 15});
25107 #endif
25110 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25111 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25113 #ifdef __AARCH64EB__
25114 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25115 #else
25116 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25117 #endif
25120 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25121 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25123 #ifdef __AARCH64EB__
25124 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25125 #else
25126 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25127 #endif
25130 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25131 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25133 #ifdef __AARCH64EB__
25134 return __builtin_shuffle (__a, __b, (uint8x16_t)
25135 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25136 #else
25137 return __builtin_shuffle (__a, __b, (uint8x16_t)
25138 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25139 #endif
25142 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25143 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25145 #ifdef __AARCH64EB__
25146 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25147 #else
25148 return __builtin_shuffle (__a, __b, (uint16x8_t)
25149 {4, 12, 5, 13, 6, 14, 7, 15});
25150 #endif
25153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25154 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25156 #ifdef __AARCH64EB__
25157 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25158 #else
25159 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25160 #endif
25163 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25164 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25166 #ifdef __AARCH64EB__
25167 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25168 #else
25169 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25170 #endif
25173 __INTERLEAVE_LIST (zip)
25175 #undef __INTERLEAVE_LIST
25176 #undef __DEFINTERLEAVE
25178 /* End of optimal implementations in approved order. */
25180 #undef __aarch64_vget_lane_any
25181 #undef __aarch64_vget_lane_f32
25182 #undef __aarch64_vget_lane_f64
25183 #undef __aarch64_vget_lane_p8
25184 #undef __aarch64_vget_lane_p16
25185 #undef __aarch64_vget_lane_s8
25186 #undef __aarch64_vget_lane_s16
25187 #undef __aarch64_vget_lane_s32
25188 #undef __aarch64_vget_lane_s64
25189 #undef __aarch64_vget_lane_u8
25190 #undef __aarch64_vget_lane_u16
25191 #undef __aarch64_vget_lane_u32
25192 #undef __aarch64_vget_lane_u64
25194 #undef __aarch64_vgetq_lane_f32
25195 #undef __aarch64_vgetq_lane_f64
25196 #undef __aarch64_vgetq_lane_p8
25197 #undef __aarch64_vgetq_lane_p16
25198 #undef __aarch64_vgetq_lane_s8
25199 #undef __aarch64_vgetq_lane_s16
25200 #undef __aarch64_vgetq_lane_s32
25201 #undef __aarch64_vgetq_lane_s64
25202 #undef __aarch64_vgetq_lane_u8
25203 #undef __aarch64_vgetq_lane_u16
25204 #undef __aarch64_vgetq_lane_u32
25205 #undef __aarch64_vgetq_lane_u64
25207 #undef __aarch64_vdup_lane_any
25208 #undef __aarch64_vdup_lane_f32
25209 #undef __aarch64_vdup_lane_f64
25210 #undef __aarch64_vdup_lane_p8
25211 #undef __aarch64_vdup_lane_p16
25212 #undef __aarch64_vdup_lane_s8
25213 #undef __aarch64_vdup_lane_s16
25214 #undef __aarch64_vdup_lane_s32
25215 #undef __aarch64_vdup_lane_s64
25216 #undef __aarch64_vdup_lane_u8
25217 #undef __aarch64_vdup_lane_u16
25218 #undef __aarch64_vdup_lane_u32
25219 #undef __aarch64_vdup_lane_u64
25220 #undef __aarch64_vdup_laneq_f32
25221 #undef __aarch64_vdup_laneq_f64
25222 #undef __aarch64_vdup_laneq_p8
25223 #undef __aarch64_vdup_laneq_p16
25224 #undef __aarch64_vdup_laneq_s8
25225 #undef __aarch64_vdup_laneq_s16
25226 #undef __aarch64_vdup_laneq_s32
25227 #undef __aarch64_vdup_laneq_s64
25228 #undef __aarch64_vdup_laneq_u8
25229 #undef __aarch64_vdup_laneq_u16
25230 #undef __aarch64_vdup_laneq_u32
25231 #undef __aarch64_vdup_laneq_u64
25232 #undef __aarch64_vdupq_lane_f32
25233 #undef __aarch64_vdupq_lane_f64
25234 #undef __aarch64_vdupq_lane_p8
25235 #undef __aarch64_vdupq_lane_p16
25236 #undef __aarch64_vdupq_lane_s8
25237 #undef __aarch64_vdupq_lane_s16
25238 #undef __aarch64_vdupq_lane_s32
25239 #undef __aarch64_vdupq_lane_s64
25240 #undef __aarch64_vdupq_lane_u8
25241 #undef __aarch64_vdupq_lane_u16
25242 #undef __aarch64_vdupq_lane_u32
25243 #undef __aarch64_vdupq_lane_u64
25244 #undef __aarch64_vdupq_laneq_f32
25245 #undef __aarch64_vdupq_laneq_f64
25246 #undef __aarch64_vdupq_laneq_p8
25247 #undef __aarch64_vdupq_laneq_p16
25248 #undef __aarch64_vdupq_laneq_s8
25249 #undef __aarch64_vdupq_laneq_s16
25250 #undef __aarch64_vdupq_laneq_s32
25251 #undef __aarch64_vdupq_laneq_s64
25252 #undef __aarch64_vdupq_laneq_u8
25253 #undef __aarch64_vdupq_laneq_u16
25254 #undef __aarch64_vdupq_laneq_u32
25255 #undef __aarch64_vdupq_laneq_u64
25257 #endif