[AArch64] Fix types for vqdmlals_lane_s32 and vqdmlsls_lane_s32 intrinsics.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob0a86172ccb9aa9ab026f4aa020fd4418098e0923
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef __builtin_aarch64_simd_di int64x1_t
42 __attribute__ ((__vector_size__ (8)));
43 typedef int32_t int32x1_t;
44 typedef int16_t int16x1_t;
45 typedef int8_t int8x1_t;
46 typedef __builtin_aarch64_simd_df float64x1_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_sf float32x2_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly8 poly8x8_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_poly16 poly16x4_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uqi uint8x8_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_uhi uint16x4_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef __builtin_aarch64_simd_usi uint32x2_t
59 __attribute__ ((__vector_size__ (8)));
60 typedef __builtin_aarch64_simd_udi uint64x1_t
61 __attribute__ ((__vector_size__ (8)));
62 typedef uint32_t uint32x1_t;
63 typedef uint16_t uint16x1_t;
64 typedef uint8_t uint8x1_t;
65 typedef __builtin_aarch64_simd_qi int8x16_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_hi int16x8_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_si int32x4_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_di int64x2_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_sf float32x4_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_df float64x2_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_poly8 poly8x16_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_poly16 poly16x8_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_poly64 poly64x2_t
82 __attribute__ ((__vector_size__ (16)));
83 typedef __builtin_aarch64_simd_uqi uint8x16_t
84 __attribute__ ((__vector_size__ (16)));
85 typedef __builtin_aarch64_simd_uhi uint16x8_t
86 __attribute__ ((__vector_size__ (16)));
87 typedef __builtin_aarch64_simd_usi uint32x4_t
88 __attribute__ ((__vector_size__ (16)));
89 typedef __builtin_aarch64_simd_udi uint64x2_t
90 __attribute__ ((__vector_size__ (16)));
92 typedef float float32_t;
93 typedef double float64_t;
94 typedef __builtin_aarch64_simd_poly8 poly8_t;
95 typedef __builtin_aarch64_simd_poly16 poly16_t;
96 typedef __builtin_aarch64_simd_poly64 poly64_t;
97 typedef __builtin_aarch64_simd_poly128 poly128_t;
99 typedef struct int8x8x2_t
101 int8x8_t val[2];
102 } int8x8x2_t;
104 typedef struct int8x16x2_t
106 int8x16_t val[2];
107 } int8x16x2_t;
109 typedef struct int16x4x2_t
111 int16x4_t val[2];
112 } int16x4x2_t;
114 typedef struct int16x8x2_t
116 int16x8_t val[2];
117 } int16x8x2_t;
119 typedef struct int32x2x2_t
121 int32x2_t val[2];
122 } int32x2x2_t;
124 typedef struct int32x4x2_t
126 int32x4_t val[2];
127 } int32x4x2_t;
129 typedef struct int64x1x2_t
131 int64x1_t val[2];
132 } int64x1x2_t;
134 typedef struct int64x2x2_t
136 int64x2_t val[2];
137 } int64x2x2_t;
139 typedef struct uint8x8x2_t
141 uint8x8_t val[2];
142 } uint8x8x2_t;
144 typedef struct uint8x16x2_t
146 uint8x16_t val[2];
147 } uint8x16x2_t;
149 typedef struct uint16x4x2_t
151 uint16x4_t val[2];
152 } uint16x4x2_t;
154 typedef struct uint16x8x2_t
156 uint16x8_t val[2];
157 } uint16x8x2_t;
159 typedef struct uint32x2x2_t
161 uint32x2_t val[2];
162 } uint32x2x2_t;
164 typedef struct uint32x4x2_t
166 uint32x4_t val[2];
167 } uint32x4x2_t;
169 typedef struct uint64x1x2_t
171 uint64x1_t val[2];
172 } uint64x1x2_t;
174 typedef struct uint64x2x2_t
176 uint64x2_t val[2];
177 } uint64x2x2_t;
179 typedef struct float32x2x2_t
181 float32x2_t val[2];
182 } float32x2x2_t;
184 typedef struct float32x4x2_t
186 float32x4_t val[2];
187 } float32x4x2_t;
189 typedef struct float64x2x2_t
191 float64x2_t val[2];
192 } float64x2x2_t;
194 typedef struct float64x1x2_t
196 float64x1_t val[2];
197 } float64x1x2_t;
199 typedef struct poly8x8x2_t
201 poly8x8_t val[2];
202 } poly8x8x2_t;
204 typedef struct poly8x16x2_t
206 poly8x16_t val[2];
207 } poly8x16x2_t;
209 typedef struct poly16x4x2_t
211 poly16x4_t val[2];
212 } poly16x4x2_t;
214 typedef struct poly16x8x2_t
216 poly16x8_t val[2];
217 } poly16x8x2_t;
219 typedef struct int8x8x3_t
221 int8x8_t val[3];
222 } int8x8x3_t;
224 typedef struct int8x16x3_t
226 int8x16_t val[3];
227 } int8x16x3_t;
229 typedef struct int16x4x3_t
231 int16x4_t val[3];
232 } int16x4x3_t;
234 typedef struct int16x8x3_t
236 int16x8_t val[3];
237 } int16x8x3_t;
239 typedef struct int32x2x3_t
241 int32x2_t val[3];
242 } int32x2x3_t;
244 typedef struct int32x4x3_t
246 int32x4_t val[3];
247 } int32x4x3_t;
249 typedef struct int64x1x3_t
251 int64x1_t val[3];
252 } int64x1x3_t;
254 typedef struct int64x2x3_t
256 int64x2_t val[3];
257 } int64x2x3_t;
259 typedef struct uint8x8x3_t
261 uint8x8_t val[3];
262 } uint8x8x3_t;
264 typedef struct uint8x16x3_t
266 uint8x16_t val[3];
267 } uint8x16x3_t;
269 typedef struct uint16x4x3_t
271 uint16x4_t val[3];
272 } uint16x4x3_t;
274 typedef struct uint16x8x3_t
276 uint16x8_t val[3];
277 } uint16x8x3_t;
279 typedef struct uint32x2x3_t
281 uint32x2_t val[3];
282 } uint32x2x3_t;
284 typedef struct uint32x4x3_t
286 uint32x4_t val[3];
287 } uint32x4x3_t;
289 typedef struct uint64x1x3_t
291 uint64x1_t val[3];
292 } uint64x1x3_t;
294 typedef struct uint64x2x3_t
296 uint64x2_t val[3];
297 } uint64x2x3_t;
299 typedef struct float32x2x3_t
301 float32x2_t val[3];
302 } float32x2x3_t;
304 typedef struct float32x4x3_t
306 float32x4_t val[3];
307 } float32x4x3_t;
309 typedef struct float64x2x3_t
311 float64x2_t val[3];
312 } float64x2x3_t;
314 typedef struct float64x1x3_t
316 float64x1_t val[3];
317 } float64x1x3_t;
319 typedef struct poly8x8x3_t
321 poly8x8_t val[3];
322 } poly8x8x3_t;
324 typedef struct poly8x16x3_t
326 poly8x16_t val[3];
327 } poly8x16x3_t;
329 typedef struct poly16x4x3_t
331 poly16x4_t val[3];
332 } poly16x4x3_t;
334 typedef struct poly16x8x3_t
336 poly16x8_t val[3];
337 } poly16x8x3_t;
339 typedef struct int8x8x4_t
341 int8x8_t val[4];
342 } int8x8x4_t;
344 typedef struct int8x16x4_t
346 int8x16_t val[4];
347 } int8x16x4_t;
349 typedef struct int16x4x4_t
351 int16x4_t val[4];
352 } int16x4x4_t;
354 typedef struct int16x8x4_t
356 int16x8_t val[4];
357 } int16x8x4_t;
359 typedef struct int32x2x4_t
361 int32x2_t val[4];
362 } int32x2x4_t;
364 typedef struct int32x4x4_t
366 int32x4_t val[4];
367 } int32x4x4_t;
369 typedef struct int64x1x4_t
371 int64x1_t val[4];
372 } int64x1x4_t;
374 typedef struct int64x2x4_t
376 int64x2_t val[4];
377 } int64x2x4_t;
379 typedef struct uint8x8x4_t
381 uint8x8_t val[4];
382 } uint8x8x4_t;
384 typedef struct uint8x16x4_t
386 uint8x16_t val[4];
387 } uint8x16x4_t;
389 typedef struct uint16x4x4_t
391 uint16x4_t val[4];
392 } uint16x4x4_t;
394 typedef struct uint16x8x4_t
396 uint16x8_t val[4];
397 } uint16x8x4_t;
399 typedef struct uint32x2x4_t
401 uint32x2_t val[4];
402 } uint32x2x4_t;
404 typedef struct uint32x4x4_t
406 uint32x4_t val[4];
407 } uint32x4x4_t;
409 typedef struct uint64x1x4_t
411 uint64x1_t val[4];
412 } uint64x1x4_t;
414 typedef struct uint64x2x4_t
416 uint64x2_t val[4];
417 } uint64x2x4_t;
419 typedef struct float32x2x4_t
421 float32x2_t val[4];
422 } float32x2x4_t;
424 typedef struct float32x4x4_t
426 float32x4_t val[4];
427 } float32x4x4_t;
429 typedef struct float64x2x4_t
431 float64x2_t val[4];
432 } float64x2x4_t;
434 typedef struct float64x1x4_t
436 float64x1_t val[4];
437 } float64x1x4_t;
439 typedef struct poly8x8x4_t
441 poly8x8_t val[4];
442 } poly8x8x4_t;
444 typedef struct poly8x16x4_t
446 poly8x16_t val[4];
447 } poly8x16x4_t;
449 typedef struct poly16x4x4_t
451 poly16x4_t val[4];
452 } poly16x4x4_t;
454 typedef struct poly16x8x4_t
456 poly16x8_t val[4];
457 } poly16x8x4_t;
459 /* vget_lane internal macros. */
461 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
462 (__cast_ret \
463 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
465 #define __aarch64_vget_lane_f32(__a, __b) \
466 __aarch64_vget_lane_any (v2sf, , , __a, __b)
467 #define __aarch64_vget_lane_f64(__a, __b) __extension__ \
468 ({ \
469 __builtin_aarch64_im_lane_boundsi (__b, 1); \
470 __a[0]; \
473 #define __aarch64_vget_lane_p8(__a, __b) \
474 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
475 #define __aarch64_vget_lane_p16(__a, __b) \
476 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
478 #define __aarch64_vget_lane_s8(__a, __b) \
479 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
480 #define __aarch64_vget_lane_s16(__a, __b) \
481 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
482 #define __aarch64_vget_lane_s32(__a, __b) \
483 __aarch64_vget_lane_any (v2si, , ,__a, __b)
484 #define __aarch64_vget_lane_s64(__a, __b) __extension__ \
485 ({ \
486 __builtin_aarch64_im_lane_boundsi (__b, 1); \
487 __a[0]; \
490 #define __aarch64_vget_lane_u8(__a, __b) \
491 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
492 #define __aarch64_vget_lane_u16(__a, __b) \
493 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
494 #define __aarch64_vget_lane_u32(__a, __b) \
495 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
496 #define __aarch64_vget_lane_u64(__a, __b) __extension__ \
497 ({ \
498 __builtin_aarch64_im_lane_boundsi (__b, 1); \
499 __a[0]; \
502 #define __aarch64_vgetq_lane_f32(__a, __b) \
503 __aarch64_vget_lane_any (v4sf, , , __a, __b)
504 #define __aarch64_vgetq_lane_f64(__a, __b) \
505 __aarch64_vget_lane_any (v2df, , , __a, __b)
507 #define __aarch64_vgetq_lane_p8(__a, __b) \
508 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
509 #define __aarch64_vgetq_lane_p16(__a, __b) \
510 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
512 #define __aarch64_vgetq_lane_s8(__a, __b) \
513 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
514 #define __aarch64_vgetq_lane_s16(__a, __b) \
515 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
516 #define __aarch64_vgetq_lane_s32(__a, __b) \
517 __aarch64_vget_lane_any (v4si, , ,__a, __b)
518 #define __aarch64_vgetq_lane_s64(__a, __b) \
519 __aarch64_vget_lane_any (v2di, , ,__a, __b)
521 #define __aarch64_vgetq_lane_u8(__a, __b) \
522 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
523 #define __aarch64_vgetq_lane_u16(__a, __b) \
524 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
525 #define __aarch64_vgetq_lane_u32(__a, __b) \
526 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
527 #define __aarch64_vgetq_lane_u64(__a, __b) \
528 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
530 /* __aarch64_vdup_lane internal macros. */
531 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
532 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
534 #define __aarch64_vdup_lane_f32(__a, __b) \
535 __aarch64_vdup_lane_any (f32, , , __a, __b)
536 #define __aarch64_vdup_lane_f64(__a, __b) \
537 __aarch64_vdup_lane_any (f64, , , __a, __b)
538 #define __aarch64_vdup_lane_p8(__a, __b) \
539 __aarch64_vdup_lane_any (p8, , , __a, __b)
540 #define __aarch64_vdup_lane_p16(__a, __b) \
541 __aarch64_vdup_lane_any (p16, , , __a, __b)
542 #define __aarch64_vdup_lane_s8(__a, __b) \
543 __aarch64_vdup_lane_any (s8, , , __a, __b)
544 #define __aarch64_vdup_lane_s16(__a, __b) \
545 __aarch64_vdup_lane_any (s16, , , __a, __b)
546 #define __aarch64_vdup_lane_s32(__a, __b) \
547 __aarch64_vdup_lane_any (s32, , , __a, __b)
548 #define __aarch64_vdup_lane_s64(__a, __b) \
549 __aarch64_vdup_lane_any (s64, , , __a, __b)
550 #define __aarch64_vdup_lane_u8(__a, __b) \
551 __aarch64_vdup_lane_any (u8, , , __a, __b)
552 #define __aarch64_vdup_lane_u16(__a, __b) \
553 __aarch64_vdup_lane_any (u16, , , __a, __b)
554 #define __aarch64_vdup_lane_u32(__a, __b) \
555 __aarch64_vdup_lane_any (u32, , , __a, __b)
556 #define __aarch64_vdup_lane_u64(__a, __b) \
557 __aarch64_vdup_lane_any (u64, , , __a, __b)
559 /* __aarch64_vdup_laneq internal macros. */
560 #define __aarch64_vdup_laneq_f32(__a, __b) \
561 __aarch64_vdup_lane_any (f32, , q, __a, __b)
562 #define __aarch64_vdup_laneq_f64(__a, __b) \
563 __aarch64_vdup_lane_any (f64, , q, __a, __b)
564 #define __aarch64_vdup_laneq_p8(__a, __b) \
565 __aarch64_vdup_lane_any (p8, , q, __a, __b)
566 #define __aarch64_vdup_laneq_p16(__a, __b) \
567 __aarch64_vdup_lane_any (p16, , q, __a, __b)
568 #define __aarch64_vdup_laneq_s8(__a, __b) \
569 __aarch64_vdup_lane_any (s8, , q, __a, __b)
570 #define __aarch64_vdup_laneq_s16(__a, __b) \
571 __aarch64_vdup_lane_any (s16, , q, __a, __b)
572 #define __aarch64_vdup_laneq_s32(__a, __b) \
573 __aarch64_vdup_lane_any (s32, , q, __a, __b)
574 #define __aarch64_vdup_laneq_s64(__a, __b) \
575 __aarch64_vdup_lane_any (s64, , q, __a, __b)
576 #define __aarch64_vdup_laneq_u8(__a, __b) \
577 __aarch64_vdup_lane_any (u8, , q, __a, __b)
578 #define __aarch64_vdup_laneq_u16(__a, __b) \
579 __aarch64_vdup_lane_any (u16, , q, __a, __b)
580 #define __aarch64_vdup_laneq_u32(__a, __b) \
581 __aarch64_vdup_lane_any (u32, , q, __a, __b)
582 #define __aarch64_vdup_laneq_u64(__a, __b) \
583 __aarch64_vdup_lane_any (u64, , q, __a, __b)
585 /* __aarch64_vdupq_lane internal macros. */
586 #define __aarch64_vdupq_lane_f32(__a, __b) \
587 __aarch64_vdup_lane_any (f32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_f64(__a, __b) \
589 __aarch64_vdup_lane_any (f64, q, , __a, __b)
590 #define __aarch64_vdupq_lane_p8(__a, __b) \
591 __aarch64_vdup_lane_any (p8, q, , __a, __b)
592 #define __aarch64_vdupq_lane_p16(__a, __b) \
593 __aarch64_vdup_lane_any (p16, q, , __a, __b)
594 #define __aarch64_vdupq_lane_s8(__a, __b) \
595 __aarch64_vdup_lane_any (s8, q, , __a, __b)
596 #define __aarch64_vdupq_lane_s16(__a, __b) \
597 __aarch64_vdup_lane_any (s16, q, , __a, __b)
598 #define __aarch64_vdupq_lane_s32(__a, __b) \
599 __aarch64_vdup_lane_any (s32, q, , __a, __b)
600 #define __aarch64_vdupq_lane_s64(__a, __b) \
601 __aarch64_vdup_lane_any (s64, q, , __a, __b)
602 #define __aarch64_vdupq_lane_u8(__a, __b) \
603 __aarch64_vdup_lane_any (u8, q, , __a, __b)
604 #define __aarch64_vdupq_lane_u16(__a, __b) \
605 __aarch64_vdup_lane_any (u16, q, , __a, __b)
606 #define __aarch64_vdupq_lane_u32(__a, __b) \
607 __aarch64_vdup_lane_any (u32, q, , __a, __b)
608 #define __aarch64_vdupq_lane_u64(__a, __b) \
609 __aarch64_vdup_lane_any (u64, q, , __a, __b)
611 /* __aarch64_vdupq_laneq internal macros. */
612 #define __aarch64_vdupq_laneq_f32(__a, __b) \
613 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
614 #define __aarch64_vdupq_laneq_f64(__a, __b) \
615 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
616 #define __aarch64_vdupq_laneq_p8(__a, __b) \
617 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
618 #define __aarch64_vdupq_laneq_p16(__a, __b) \
619 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
620 #define __aarch64_vdupq_laneq_s8(__a, __b) \
621 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
622 #define __aarch64_vdupq_laneq_s16(__a, __b) \
623 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
624 #define __aarch64_vdupq_laneq_s32(__a, __b) \
625 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
626 #define __aarch64_vdupq_laneq_s64(__a, __b) \
627 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
628 #define __aarch64_vdupq_laneq_u8(__a, __b) \
629 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
630 #define __aarch64_vdupq_laneq_u16(__a, __b) \
631 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
632 #define __aarch64_vdupq_laneq_u32(__a, __b) \
633 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
634 #define __aarch64_vdupq_laneq_u64(__a, __b) \
635 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
637 /* vadd */
638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
639 vadd_s8 (int8x8_t __a, int8x8_t __b)
641 return __a + __b;
644 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
645 vadd_s16 (int16x4_t __a, int16x4_t __b)
647 return __a + __b;
650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
651 vadd_s32 (int32x2_t __a, int32x2_t __b)
653 return __a + __b;
656 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
657 vadd_f32 (float32x2_t __a, float32x2_t __b)
659 return __a + __b;
662 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
663 vadd_f64 (float64x1_t __a, float64x1_t __b)
665 return __a + __b;
668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
669 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
671 return __a + __b;
674 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
675 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
677 return __a + __b;
680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
681 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
683 return __a + __b;
686 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
687 vadd_s64 (int64x1_t __a, int64x1_t __b)
689 return __a + __b;
692 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
693 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
695 return __a + __b;
698 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
699 vaddq_s8 (int8x16_t __a, int8x16_t __b)
701 return __a + __b;
704 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
705 vaddq_s16 (int16x8_t __a, int16x8_t __b)
707 return __a + __b;
710 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
711 vaddq_s32 (int32x4_t __a, int32x4_t __b)
713 return __a + __b;
716 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
717 vaddq_s64 (int64x2_t __a, int64x2_t __b)
719 return __a + __b;
722 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
723 vaddq_f32 (float32x4_t __a, float32x4_t __b)
725 return __a + __b;
728 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
729 vaddq_f64 (float64x2_t __a, float64x2_t __b)
731 return __a + __b;
734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
735 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
737 return __a + __b;
740 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
741 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
743 return __a + __b;
746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
747 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
749 return __a + __b;
752 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
753 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
755 return __a + __b;
758 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
759 vaddl_s8 (int8x8_t __a, int8x8_t __b)
761 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
764 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
765 vaddl_s16 (int16x4_t __a, int16x4_t __b)
767 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
770 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
771 vaddl_s32 (int32x2_t __a, int32x2_t __b)
773 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
777 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
779 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
780 (int8x8_t) __b);
783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
784 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
786 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
787 (int16x4_t) __b);
790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
791 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
793 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
794 (int32x2_t) __b);
797 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
798 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
800 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
804 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
806 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
809 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
810 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
812 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
815 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
816 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
818 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
819 (int8x16_t) __b);
822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
823 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
825 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
826 (int16x8_t) __b);
829 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
830 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
832 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
833 (int32x4_t) __b);
836 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
837 vaddw_s8 (int16x8_t __a, int8x8_t __b)
839 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
843 vaddw_s16 (int32x4_t __a, int16x4_t __b)
845 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
848 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
849 vaddw_s32 (int64x2_t __a, int32x2_t __b)
851 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
855 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
857 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
858 (int8x8_t) __b);
861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
862 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
864 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
865 (int16x4_t) __b);
868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
869 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
871 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
872 (int32x2_t) __b);
875 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
876 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
878 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
881 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
882 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
884 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
887 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
888 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
890 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
894 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
896 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
897 (int8x16_t) __b);
900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
901 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
903 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
904 (int16x8_t) __b);
907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
908 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
910 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
911 (int32x4_t) __b);
914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
915 vhadd_s8 (int8x8_t __a, int8x8_t __b)
917 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
920 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
921 vhadd_s16 (int16x4_t __a, int16x4_t __b)
923 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
926 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
927 vhadd_s32 (int32x2_t __a, int32x2_t __b)
929 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
933 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
935 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
936 (int8x8_t) __b);
939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
940 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
942 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
943 (int16x4_t) __b);
946 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
947 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
949 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
950 (int32x2_t) __b);
953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
954 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
956 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
959 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
960 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
962 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
966 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
968 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
971 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
972 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
974 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
975 (int8x16_t) __b);
978 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
979 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
981 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
982 (int16x8_t) __b);
985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
986 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
988 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
989 (int32x4_t) __b);
992 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
993 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
995 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
998 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
999 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
1001 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
1004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1005 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
1007 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
1010 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1011 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
1013 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
1014 (int8x8_t) __b);
1017 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1018 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
1020 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1021 (int16x4_t) __b);
1024 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1025 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1027 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1028 (int32x2_t) __b);
1031 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1032 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1034 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1038 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1040 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1044 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1046 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1049 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1050 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1052 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1053 (int8x16_t) __b);
1056 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1057 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1059 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1060 (int16x8_t) __b);
1063 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1064 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1066 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1067 (int32x4_t) __b);
1070 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1071 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1073 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1076 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1077 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1079 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1082 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1083 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1085 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1088 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1089 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1091 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1092 (int16x8_t) __b);
1095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1096 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1098 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1099 (int32x4_t) __b);
1102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1103 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1105 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1106 (int64x2_t) __b);
1109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1110 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1112 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1115 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1116 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1118 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1121 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1122 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1124 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1127 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1128 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1130 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1131 (int16x8_t) __b);
1134 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1135 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1137 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1138 (int32x4_t) __b);
1141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1142 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1144 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1145 (int64x2_t) __b);
1148 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1149 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1151 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1155 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1157 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1160 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1161 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1163 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1166 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1167 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1169 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1170 (int16x8_t) __b,
1171 (int16x8_t) __c);
1174 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1175 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1177 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1178 (int32x4_t) __b,
1179 (int32x4_t) __c);
1182 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1183 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1185 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1186 (int64x2_t) __b,
1187 (int64x2_t) __c);
1190 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1191 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1193 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1196 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1197 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1199 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1203 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1205 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1209 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1211 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1212 (int16x8_t) __b,
1213 (int16x8_t) __c);
1216 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1217 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1219 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1220 (int32x4_t) __b,
1221 (int32x4_t) __c);
1224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1225 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1227 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1228 (int64x2_t) __b,
1229 (int64x2_t) __c);
1232 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1233 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1235 return __a / __b;
1238 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1239 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1241 return __a / __b;
1244 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1245 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1247 return __a / __b;
1250 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1251 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1253 return __a / __b;
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1257 vmul_s8 (int8x8_t __a, int8x8_t __b)
1259 return __a * __b;
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1263 vmul_s16 (int16x4_t __a, int16x4_t __b)
1265 return __a * __b;
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1269 vmul_s32 (int32x2_t __a, int32x2_t __b)
1271 return __a * __b;
1274 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1275 vmul_f32 (float32x2_t __a, float32x2_t __b)
1277 return __a * __b;
1280 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1281 vmul_f64 (float64x1_t __a, float64x1_t __b)
1283 return __a * __b;
1286 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1287 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1289 return __a * __b;
1292 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1293 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1295 return __a * __b;
1298 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1299 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1301 return __a * __b;
1304 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1305 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1307 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1308 (int8x8_t) __b);
1311 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1312 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1314 return __a * __b;
1317 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1318 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1320 return __a * __b;
1323 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1324 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1326 return __a * __b;
1329 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1330 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1332 return __a * __b;
1335 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1336 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1338 return __a * __b;
1341 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1342 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1344 return __a * __b;
1347 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1348 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1350 return __a * __b;
1353 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1354 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1356 return __a * __b;
1359 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1360 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1362 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1363 (int8x16_t) __b);
1366 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1367 vand_s8 (int8x8_t __a, int8x8_t __b)
1369 return __a & __b;
1372 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1373 vand_s16 (int16x4_t __a, int16x4_t __b)
1375 return __a & __b;
1378 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1379 vand_s32 (int32x2_t __a, int32x2_t __b)
1381 return __a & __b;
1384 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1385 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1387 return __a & __b;
1390 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1391 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1393 return __a & __b;
1396 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1397 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1399 return __a & __b;
1402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1403 vand_s64 (int64x1_t __a, int64x1_t __b)
1405 return __a & __b;
1408 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1409 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1411 return __a & __b;
1414 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1415 vandq_s8 (int8x16_t __a, int8x16_t __b)
1417 return __a & __b;
1420 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1421 vandq_s16 (int16x8_t __a, int16x8_t __b)
1423 return __a & __b;
1426 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1427 vandq_s32 (int32x4_t __a, int32x4_t __b)
1429 return __a & __b;
1432 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1433 vandq_s64 (int64x2_t __a, int64x2_t __b)
1435 return __a & __b;
1438 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1439 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1441 return __a & __b;
1444 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1445 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1447 return __a & __b;
1450 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1451 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1453 return __a & __b;
1456 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1457 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1459 return __a & __b;
1462 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1463 vorr_s8 (int8x8_t __a, int8x8_t __b)
1465 return __a | __b;
1468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1469 vorr_s16 (int16x4_t __a, int16x4_t __b)
1471 return __a | __b;
1474 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1475 vorr_s32 (int32x2_t __a, int32x2_t __b)
1477 return __a | __b;
1480 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1481 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1483 return __a | __b;
1486 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1487 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1489 return __a | __b;
1492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1493 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1495 return __a | __b;
1498 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1499 vorr_s64 (int64x1_t __a, int64x1_t __b)
1501 return __a | __b;
1504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1505 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1507 return __a | __b;
1510 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1511 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1513 return __a | __b;
1516 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1517 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1519 return __a | __b;
1522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1523 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1525 return __a | __b;
1528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1529 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1531 return __a | __b;
1534 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1535 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1537 return __a | __b;
1540 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1541 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1543 return __a | __b;
1546 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1547 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1549 return __a | __b;
1552 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1553 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1555 return __a | __b;
1558 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1559 veor_s8 (int8x8_t __a, int8x8_t __b)
1561 return __a ^ __b;
1564 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1565 veor_s16 (int16x4_t __a, int16x4_t __b)
1567 return __a ^ __b;
1570 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1571 veor_s32 (int32x2_t __a, int32x2_t __b)
1573 return __a ^ __b;
1576 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1577 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1579 return __a ^ __b;
1582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1583 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1585 return __a ^ __b;
1588 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1589 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1591 return __a ^ __b;
1594 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1595 veor_s64 (int64x1_t __a, int64x1_t __b)
1597 return __a ^ __b;
1600 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1601 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1603 return __a ^ __b;
1606 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1607 veorq_s8 (int8x16_t __a, int8x16_t __b)
1609 return __a ^ __b;
1612 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1613 veorq_s16 (int16x8_t __a, int16x8_t __b)
1615 return __a ^ __b;
1618 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1619 veorq_s32 (int32x4_t __a, int32x4_t __b)
1621 return __a ^ __b;
1624 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1625 veorq_s64 (int64x2_t __a, int64x2_t __b)
1627 return __a ^ __b;
1630 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1631 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1633 return __a ^ __b;
1636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1637 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1639 return __a ^ __b;
1642 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1643 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1645 return __a ^ __b;
1648 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1649 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1651 return __a ^ __b;
1654 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1655 vbic_s8 (int8x8_t __a, int8x8_t __b)
1657 return __a & ~__b;
1660 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1661 vbic_s16 (int16x4_t __a, int16x4_t __b)
1663 return __a & ~__b;
1666 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1667 vbic_s32 (int32x2_t __a, int32x2_t __b)
1669 return __a & ~__b;
1672 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1673 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1675 return __a & ~__b;
1678 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1679 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1681 return __a & ~__b;
1684 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1685 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1687 return __a & ~__b;
1690 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1691 vbic_s64 (int64x1_t __a, int64x1_t __b)
1693 return __a & ~__b;
1696 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1697 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1699 return __a & ~__b;
1702 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1703 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1705 return __a & ~__b;
1708 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1709 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1711 return __a & ~__b;
1714 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1715 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1717 return __a & ~__b;
1720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1721 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1723 return __a & ~__b;
1726 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1727 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1729 return __a & ~__b;
1732 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1733 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1735 return __a & ~__b;
1738 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1739 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1741 return __a & ~__b;
1744 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1745 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1747 return __a & ~__b;
1750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1751 vorn_s8 (int8x8_t __a, int8x8_t __b)
1753 return __a | ~__b;
1756 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1757 vorn_s16 (int16x4_t __a, int16x4_t __b)
1759 return __a | ~__b;
1762 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1763 vorn_s32 (int32x2_t __a, int32x2_t __b)
1765 return __a | ~__b;
1768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1769 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1771 return __a | ~__b;
1774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1775 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1777 return __a | ~__b;
1780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1781 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1783 return __a | ~__b;
1786 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1787 vorn_s64 (int64x1_t __a, int64x1_t __b)
1789 return __a | ~__b;
1792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1793 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1795 return __a | ~__b;
1798 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1799 vornq_s8 (int8x16_t __a, int8x16_t __b)
1801 return __a | ~__b;
1804 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1805 vornq_s16 (int16x8_t __a, int16x8_t __b)
1807 return __a | ~__b;
1810 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1811 vornq_s32 (int32x4_t __a, int32x4_t __b)
1813 return __a | ~__b;
1816 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1817 vornq_s64 (int64x2_t __a, int64x2_t __b)
1819 return __a | ~__b;
1822 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1823 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1825 return __a | ~__b;
1828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1829 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1831 return __a | ~__b;
1834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1835 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1837 return __a | ~__b;
1840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1841 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1843 return __a | ~__b;
1846 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1847 vsub_s8 (int8x8_t __a, int8x8_t __b)
1849 return __a - __b;
1852 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1853 vsub_s16 (int16x4_t __a, int16x4_t __b)
1855 return __a - __b;
1858 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1859 vsub_s32 (int32x2_t __a, int32x2_t __b)
1861 return __a - __b;
1864 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1865 vsub_f32 (float32x2_t __a, float32x2_t __b)
1867 return __a - __b;
1870 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1871 vsub_f64 (float64x1_t __a, float64x1_t __b)
1873 return __a - __b;
1876 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1877 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1879 return __a - __b;
1882 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1883 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1885 return __a - __b;
1888 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1889 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1891 return __a - __b;
1894 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1895 vsub_s64 (int64x1_t __a, int64x1_t __b)
1897 return __a - __b;
1900 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1901 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1903 return __a - __b;
1906 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1907 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1909 return __a - __b;
1912 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1913 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1915 return __a - __b;
1918 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1919 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1921 return __a - __b;
1924 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1925 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1927 return __a - __b;
1930 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1931 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1933 return __a - __b;
1936 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1937 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1939 return __a - __b;
1942 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1943 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1945 return __a - __b;
1948 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1949 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1951 return __a - __b;
1954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1955 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1957 return __a - __b;
1960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1961 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1963 return __a - __b;
1966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1967 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1969 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1972 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1973 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1975 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1979 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1981 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1984 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1985 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1987 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1988 (int8x8_t) __b);
1991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1992 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1994 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1995 (int16x4_t) __b);
1998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1999 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
2001 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
2002 (int32x2_t) __b);
2005 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2006 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
2008 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
2011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2012 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
2014 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
2017 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2018 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
2020 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
2023 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2024 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
2026 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2027 (int8x16_t) __b);
2030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2031 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2033 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2034 (int16x8_t) __b);
2037 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2038 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2040 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2041 (int32x4_t) __b);
2044 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2045 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2047 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2050 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2051 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2053 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2056 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2057 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2059 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2062 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2063 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2065 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2066 (int8x8_t) __b);
2069 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2070 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2072 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2073 (int16x4_t) __b);
2076 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2077 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2079 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2080 (int32x2_t) __b);
2083 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2084 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2086 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2089 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2090 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2092 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2095 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2096 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2098 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2101 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2102 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2104 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2105 (int8x16_t) __b);
2108 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2109 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2111 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2112 (int16x8_t) __b);
2115 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2116 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2118 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2119 (int32x4_t) __b);
2122 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2123 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2125 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2128 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2129 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2131 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2134 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2135 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2137 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2140 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2141 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2143 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2147 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2149 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2152 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2153 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2155 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2159 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2161 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2164 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2165 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2167 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2170 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2171 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2173 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2177 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2179 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2182 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2183 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2185 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2189 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2191 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2195 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2197 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2201 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2203 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2207 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2209 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2213 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2215 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2218 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2219 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2221 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2225 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2227 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2231 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2233 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2237 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2239 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2243 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2245 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2248 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2249 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2251 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2254 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2255 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2257 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2260 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2261 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2263 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2266 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2267 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2269 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2273 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2275 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2279 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2281 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2285 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2287 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2290 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2291 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2293 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2297 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2299 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2302 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2303 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2305 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2308 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2309 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2311 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2314 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2315 vqneg_s8 (int8x8_t __a)
2317 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2320 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2321 vqneg_s16 (int16x4_t __a)
2323 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2326 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2327 vqneg_s32 (int32x2_t __a)
2329 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2332 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2333 vqneg_s64 (int64x1_t __a)
2335 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2338 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2339 vqnegq_s8 (int8x16_t __a)
2341 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2344 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2345 vqnegq_s16 (int16x8_t __a)
2347 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2350 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2351 vqnegq_s32 (int32x4_t __a)
2353 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2356 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2357 vqabs_s8 (int8x8_t __a)
2359 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2362 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2363 vqabs_s16 (int16x4_t __a)
2365 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2368 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2369 vqabs_s32 (int32x2_t __a)
2371 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2374 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2375 vqabs_s64 (int64x1_t __a)
2377 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2380 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2381 vqabsq_s8 (int8x16_t __a)
2383 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2387 vqabsq_s16 (int16x8_t __a)
2389 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2393 vqabsq_s32 (int32x4_t __a)
2395 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2398 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2399 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2401 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2404 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2405 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2407 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2410 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2411 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2413 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2417 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2419 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2423 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2425 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2429 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2431 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2434 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2435 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2437 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2441 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2443 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2446 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2447 vcreate_s8 (uint64_t __a)
2449 return (int8x8_t) __a;
2452 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2453 vcreate_s16 (uint64_t __a)
2455 return (int16x4_t) __a;
2458 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2459 vcreate_s32 (uint64_t __a)
2461 return (int32x2_t) __a;
2464 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2465 vcreate_s64 (uint64_t __a)
2467 return (int64x1_t) {__a};
2470 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2471 vcreate_f32 (uint64_t __a)
2473 return (float32x2_t) __a;
2476 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2477 vcreate_u8 (uint64_t __a)
2479 return (uint8x8_t) __a;
2482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2483 vcreate_u16 (uint64_t __a)
2485 return (uint16x4_t) __a;
2488 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2489 vcreate_u32 (uint64_t __a)
2491 return (uint32x2_t) __a;
2494 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2495 vcreate_u64 (uint64_t __a)
2497 return (uint64x1_t) {__a};
2500 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2501 vcreate_f64 (uint64_t __a)
2503 return __builtin_aarch64_createv1df (__a);
2506 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2507 vcreate_p8 (uint64_t __a)
2509 return (poly8x8_t) __a;
2512 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2513 vcreate_p16 (uint64_t __a)
2515 return (poly16x4_t) __a;
2518 /* vget_lane */
2520 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2521 vget_lane_f32 (float32x2_t __a, const int __b)
2523 return __aarch64_vget_lane_f32 (__a, __b);
2526 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2527 vget_lane_f64 (float64x1_t __a, const int __b)
2529 return __aarch64_vget_lane_f64 (__a, __b);
2532 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2533 vget_lane_p8 (poly8x8_t __a, const int __b)
2535 return __aarch64_vget_lane_p8 (__a, __b);
2538 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2539 vget_lane_p16 (poly16x4_t __a, const int __b)
2541 return __aarch64_vget_lane_p16 (__a, __b);
2544 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2545 vget_lane_s8 (int8x8_t __a, const int __b)
2547 return __aarch64_vget_lane_s8 (__a, __b);
2550 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2551 vget_lane_s16 (int16x4_t __a, const int __b)
2553 return __aarch64_vget_lane_s16 (__a, __b);
2556 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2557 vget_lane_s32 (int32x2_t __a, const int __b)
2559 return __aarch64_vget_lane_s32 (__a, __b);
2562 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2563 vget_lane_s64 (int64x1_t __a, const int __b)
2565 return __aarch64_vget_lane_s64 (__a, __b);
2568 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2569 vget_lane_u8 (uint8x8_t __a, const int __b)
2571 return __aarch64_vget_lane_u8 (__a, __b);
2574 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2575 vget_lane_u16 (uint16x4_t __a, const int __b)
2577 return __aarch64_vget_lane_u16 (__a, __b);
2580 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2581 vget_lane_u32 (uint32x2_t __a, const int __b)
2583 return __aarch64_vget_lane_u32 (__a, __b);
2586 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2587 vget_lane_u64 (uint64x1_t __a, const int __b)
2589 return __aarch64_vget_lane_u64 (__a, __b);
2592 /* vgetq_lane */
2594 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2595 vgetq_lane_f32 (float32x4_t __a, const int __b)
2597 return __aarch64_vgetq_lane_f32 (__a, __b);
2600 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2601 vgetq_lane_f64 (float64x2_t __a, const int __b)
2603 return __aarch64_vgetq_lane_f64 (__a, __b);
2606 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2607 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2609 return __aarch64_vgetq_lane_p8 (__a, __b);
2612 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2613 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2615 return __aarch64_vgetq_lane_p16 (__a, __b);
2618 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2619 vgetq_lane_s8 (int8x16_t __a, const int __b)
2621 return __aarch64_vgetq_lane_s8 (__a, __b);
2624 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2625 vgetq_lane_s16 (int16x8_t __a, const int __b)
2627 return __aarch64_vgetq_lane_s16 (__a, __b);
2630 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2631 vgetq_lane_s32 (int32x4_t __a, const int __b)
2633 return __aarch64_vgetq_lane_s32 (__a, __b);
2636 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2637 vgetq_lane_s64 (int64x2_t __a, const int __b)
2639 return __aarch64_vgetq_lane_s64 (__a, __b);
2642 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2643 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2645 return __aarch64_vgetq_lane_u8 (__a, __b);
2648 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2649 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2651 return __aarch64_vgetq_lane_u16 (__a, __b);
2654 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2655 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2657 return __aarch64_vgetq_lane_u32 (__a, __b);
2660 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2661 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2663 return __aarch64_vgetq_lane_u64 (__a, __b);
2666 /* vreinterpret */
2668 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2669 vreinterpret_p8_f64 (float64x1_t __a)
2671 return __builtin_aarch64_reinterpretv8qiv1df_ps (__a);
2674 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2675 vreinterpret_p8_s8 (int8x8_t __a)
2677 return (poly8x8_t) __a;
2680 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2681 vreinterpret_p8_s16 (int16x4_t __a)
2683 return (poly8x8_t) __a;
2686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2687 vreinterpret_p8_s32 (int32x2_t __a)
2689 return (poly8x8_t) __a;
2692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2693 vreinterpret_p8_s64 (int64x1_t __a)
2695 return (poly8x8_t) __a;
2698 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2699 vreinterpret_p8_f32 (float32x2_t __a)
2701 return (poly8x8_t) __a;
2704 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2705 vreinterpret_p8_u8 (uint8x8_t __a)
2707 return (poly8x8_t) __a;
2710 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2711 vreinterpret_p8_u16 (uint16x4_t __a)
2713 return (poly8x8_t) __a;
2716 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2717 vreinterpret_p8_u32 (uint32x2_t __a)
2719 return (poly8x8_t) __a;
2722 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2723 vreinterpret_p8_u64 (uint64x1_t __a)
2725 return (poly8x8_t) __a;
2728 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2729 vreinterpret_p8_p16 (poly16x4_t __a)
2731 return (poly8x8_t) __a;
2734 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2735 vreinterpretq_p8_f64 (float64x2_t __a)
2737 return (poly8x16_t) __a;
2740 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2741 vreinterpretq_p8_s8 (int8x16_t __a)
2743 return (poly8x16_t) __a;
2746 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2747 vreinterpretq_p8_s16 (int16x8_t __a)
2749 return (poly8x16_t) __a;
2752 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2753 vreinterpretq_p8_s32 (int32x4_t __a)
2755 return (poly8x16_t) __a;
2758 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2759 vreinterpretq_p8_s64 (int64x2_t __a)
2761 return (poly8x16_t) __a;
2764 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2765 vreinterpretq_p8_f32 (float32x4_t __a)
2767 return (poly8x16_t) __a;
2770 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2771 vreinterpretq_p8_u8 (uint8x16_t __a)
2773 return (poly8x16_t) __a;
2776 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2777 vreinterpretq_p8_u16 (uint16x8_t __a)
2779 return (poly8x16_t) __a;
2782 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2783 vreinterpretq_p8_u32 (uint32x4_t __a)
2785 return (poly8x16_t) __a;
2788 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2789 vreinterpretq_p8_u64 (uint64x2_t __a)
2791 return (poly8x16_t) __a;
2794 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2795 vreinterpretq_p8_p16 (poly16x8_t __a)
2797 return (poly8x16_t) __a;
2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801 vreinterpret_p16_f64 (float64x1_t __a)
2803 return __builtin_aarch64_reinterpretv4hiv1df_ps (__a);
2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807 vreinterpret_p16_s8 (int8x8_t __a)
2809 return (poly16x4_t) __a;
2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813 vreinterpret_p16_s16 (int16x4_t __a)
2815 return (poly16x4_t) __a;
2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819 vreinterpret_p16_s32 (int32x2_t __a)
2821 return (poly16x4_t) __a;
2824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825 vreinterpret_p16_s64 (int64x1_t __a)
2827 return (poly16x4_t) __a;
2830 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2831 vreinterpret_p16_f32 (float32x2_t __a)
2833 return (poly16x4_t) __a;
2836 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2837 vreinterpret_p16_u8 (uint8x8_t __a)
2839 return (poly16x4_t) __a;
2842 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2843 vreinterpret_p16_u16 (uint16x4_t __a)
2845 return (poly16x4_t) __a;
2848 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2849 vreinterpret_p16_u32 (uint32x2_t __a)
2851 return (poly16x4_t) __a;
2854 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2855 vreinterpret_p16_u64 (uint64x1_t __a)
2857 return (poly16x4_t) __a;
2860 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2861 vreinterpret_p16_p8 (poly8x8_t __a)
2863 return (poly16x4_t) __a;
2866 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2867 vreinterpretq_p16_f64 (float64x2_t __a)
2869 return (poly16x8_t) __a;
2872 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2873 vreinterpretq_p16_s8 (int8x16_t __a)
2875 return (poly16x8_t) __a;
2878 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2879 vreinterpretq_p16_s16 (int16x8_t __a)
2881 return (poly16x8_t) __a;
2884 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2885 vreinterpretq_p16_s32 (int32x4_t __a)
2887 return (poly16x8_t) __a;
2890 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2891 vreinterpretq_p16_s64 (int64x2_t __a)
2893 return (poly16x8_t) __a;
2896 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2897 vreinterpretq_p16_f32 (float32x4_t __a)
2899 return (poly16x8_t) __a;
2902 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2903 vreinterpretq_p16_u8 (uint8x16_t __a)
2905 return (poly16x8_t) __a;
2908 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2909 vreinterpretq_p16_u16 (uint16x8_t __a)
2911 return (poly16x8_t) __a;
2914 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2915 vreinterpretq_p16_u32 (uint32x4_t __a)
2917 return (poly16x8_t) __a;
2920 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2921 vreinterpretq_p16_u64 (uint64x2_t __a)
2923 return (poly16x8_t) __a;
2926 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2927 vreinterpretq_p16_p8 (poly8x16_t __a)
2929 return (poly16x8_t) __a;
2932 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2933 vreinterpret_f32_f64 (float64x1_t __a)
2935 return __builtin_aarch64_reinterpretv2sfv1df (__a);
2938 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2939 vreinterpret_f32_s8 (int8x8_t __a)
2941 return (float32x2_t) __a;
2944 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2945 vreinterpret_f32_s16 (int16x4_t __a)
2947 return (float32x2_t) __a;
2950 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2951 vreinterpret_f32_s32 (int32x2_t __a)
2953 return (float32x2_t) __a;
2956 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2957 vreinterpret_f32_s64 (int64x1_t __a)
2959 return (float32x2_t) __a;
2962 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2963 vreinterpret_f32_u8 (uint8x8_t __a)
2965 return (float32x2_t) __a;
2968 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2969 vreinterpret_f32_u16 (uint16x4_t __a)
2971 return (float32x2_t) __a;
2974 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2975 vreinterpret_f32_u32 (uint32x2_t __a)
2977 return (float32x2_t) __a;
2980 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2981 vreinterpret_f32_u64 (uint64x1_t __a)
2983 return (float32x2_t) __a;
2986 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2987 vreinterpret_f32_p8 (poly8x8_t __a)
2989 return (float32x2_t) __a;
2992 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2993 vreinterpret_f32_p16 (poly16x4_t __a)
2995 return (float32x2_t) __a;
2998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2999 vreinterpretq_f32_f64 (float64x2_t __a)
3001 return (float32x4_t) __a;
3004 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3005 vreinterpretq_f32_s8 (int8x16_t __a)
3007 return (float32x4_t) __a;
3010 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3011 vreinterpretq_f32_s16 (int16x8_t __a)
3013 return (float32x4_t) __a;
3016 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3017 vreinterpretq_f32_s32 (int32x4_t __a)
3019 return (float32x4_t) __a;
3022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3023 vreinterpretq_f32_s64 (int64x2_t __a)
3025 return (float32x4_t) __a;
3028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3029 vreinterpretq_f32_u8 (uint8x16_t __a)
3031 return (float32x4_t) __a;
3034 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3035 vreinterpretq_f32_u16 (uint16x8_t __a)
3037 return (float32x4_t) __a;
3040 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3041 vreinterpretq_f32_u32 (uint32x4_t __a)
3043 return (float32x4_t) __a;
3046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3047 vreinterpretq_f32_u64 (uint64x2_t __a)
3049 return (float32x4_t) __a;
3052 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3053 vreinterpretq_f32_p8 (poly8x16_t __a)
3055 return (float32x4_t) __a;
3058 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3059 vreinterpretq_f32_p16 (poly16x8_t __a)
3061 return (float32x4_t) __a;
3064 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3065 vreinterpret_f64_f32 (float32x2_t __a)
3067 return __builtin_aarch64_reinterpretv1dfv2sf (__a);
3070 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3071 vreinterpret_f64_p8 (poly8x8_t __a)
3073 return __builtin_aarch64_reinterpretv1dfv8qi_sp (__a);
3076 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3077 vreinterpret_f64_p16 (poly16x4_t __a)
3079 return __builtin_aarch64_reinterpretv1dfv4hi_sp (__a);
3082 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3083 vreinterpret_f64_s8 (int8x8_t __a)
3085 return __builtin_aarch64_reinterpretv1dfv8qi (__a);
3088 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3089 vreinterpret_f64_s16 (int16x4_t __a)
3091 return __builtin_aarch64_reinterpretv1dfv4hi (__a);
3094 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3095 vreinterpret_f64_s32 (int32x2_t __a)
3097 return __builtin_aarch64_reinterpretv1dfv2si (__a);
3100 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3101 vreinterpret_f64_s64 (int64x1_t __a)
3103 return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
3106 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3107 vreinterpret_f64_u8 (uint8x8_t __a)
3109 return __builtin_aarch64_reinterpretv1dfv8qi_su (__a);
3112 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3113 vreinterpret_f64_u16 (uint16x4_t __a)
3115 return __builtin_aarch64_reinterpretv1dfv4hi_su (__a);
3118 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3119 vreinterpret_f64_u32 (uint32x2_t __a)
3121 return __builtin_aarch64_reinterpretv1dfv2si_su (__a);
3124 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3125 vreinterpret_f64_u64 (uint64x1_t __a)
3127 return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
3130 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3131 vreinterpretq_f64_f32 (float32x4_t __a)
3133 return (float64x2_t) __a;
3136 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3137 vreinterpretq_f64_p8 (poly8x16_t __a)
3139 return (float64x2_t) __a;
3142 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3143 vreinterpretq_f64_p16 (poly16x8_t __a)
3145 return (float64x2_t) __a;
3148 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3149 vreinterpretq_f64_s8 (int8x16_t __a)
3151 return (float64x2_t) __a;
3154 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3155 vreinterpretq_f64_s16 (int16x8_t __a)
3157 return (float64x2_t) __a;
3160 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3161 vreinterpretq_f64_s32 (int32x4_t __a)
3163 return (float64x2_t) __a;
3166 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3167 vreinterpretq_f64_s64 (int64x2_t __a)
3169 return (float64x2_t) __a;
3172 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3173 vreinterpretq_f64_u8 (uint8x16_t __a)
3175 return (float64x2_t) __a;
3178 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3179 vreinterpretq_f64_u16 (uint16x8_t __a)
3181 return (float64x2_t) __a;
3184 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3185 vreinterpretq_f64_u32 (uint32x4_t __a)
3187 return (float64x2_t) __a;
3190 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3191 vreinterpretq_f64_u64 (uint64x2_t __a)
3193 return (float64x2_t) __a;
3196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3197 vreinterpret_s64_f64 (float64x1_t __a)
3199 return (int64x1_t) {__builtin_aarch64_reinterpretdiv1df (__a)};
3202 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3203 vreinterpret_s64_s8 (int8x8_t __a)
3205 return (int64x1_t) __a;
3208 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3209 vreinterpret_s64_s16 (int16x4_t __a)
3211 return (int64x1_t) __a;
3214 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3215 vreinterpret_s64_s32 (int32x2_t __a)
3217 return (int64x1_t) __a;
3220 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3221 vreinterpret_s64_f32 (float32x2_t __a)
3223 return (int64x1_t) __a;
3226 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3227 vreinterpret_s64_u8 (uint8x8_t __a)
3229 return (int64x1_t) __a;
3232 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3233 vreinterpret_s64_u16 (uint16x4_t __a)
3235 return (int64x1_t) __a;
3238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3239 vreinterpret_s64_u32 (uint32x2_t __a)
3241 return (int64x1_t) __a;
3244 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3245 vreinterpret_s64_u64 (uint64x1_t __a)
3247 return (int64x1_t) __a;
3250 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3251 vreinterpret_s64_p8 (poly8x8_t __a)
3253 return (int64x1_t) __a;
3256 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3257 vreinterpret_s64_p16 (poly16x4_t __a)
3259 return (int64x1_t) __a;
3262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3263 vreinterpretq_s64_f64 (float64x2_t __a)
3265 return (int64x2_t) __a;
3268 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3269 vreinterpretq_s64_s8 (int8x16_t __a)
3271 return (int64x2_t) __a;
3274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3275 vreinterpretq_s64_s16 (int16x8_t __a)
3277 return (int64x2_t) __a;
3280 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3281 vreinterpretq_s64_s32 (int32x4_t __a)
3283 return (int64x2_t) __a;
3286 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3287 vreinterpretq_s64_f32 (float32x4_t __a)
3289 return (int64x2_t) __a;
3292 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3293 vreinterpretq_s64_u8 (uint8x16_t __a)
3295 return (int64x2_t) __a;
3298 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3299 vreinterpretq_s64_u16 (uint16x8_t __a)
3301 return (int64x2_t) __a;
3304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3305 vreinterpretq_s64_u32 (uint32x4_t __a)
3307 return (int64x2_t) __a;
3310 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3311 vreinterpretq_s64_u64 (uint64x2_t __a)
3313 return (int64x2_t) __a;
3316 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3317 vreinterpretq_s64_p8 (poly8x16_t __a)
3319 return (int64x2_t) __a;
3322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3323 vreinterpretq_s64_p16 (poly16x8_t __a)
3325 return (int64x2_t) __a;
3328 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3329 vreinterpret_u64_f64 (float64x1_t __a)
3331 return (uint64x1_t) {__builtin_aarch64_reinterpretdiv1df_us (__a)};
3334 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3335 vreinterpret_u64_s8 (int8x8_t __a)
3337 return (uint64x1_t) __a;
3340 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3341 vreinterpret_u64_s16 (int16x4_t __a)
3343 return (uint64x1_t) __a;
3346 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3347 vreinterpret_u64_s32 (int32x2_t __a)
3349 return (uint64x1_t) __a;
3352 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3353 vreinterpret_u64_s64 (int64x1_t __a)
3355 return (uint64x1_t) __a;
3358 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3359 vreinterpret_u64_f32 (float32x2_t __a)
3361 return (uint64x1_t) __a;
3364 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3365 vreinterpret_u64_u8 (uint8x8_t __a)
3367 return (uint64x1_t) __a;
3370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3371 vreinterpret_u64_u16 (uint16x4_t __a)
3373 return (uint64x1_t) __a;
3376 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3377 vreinterpret_u64_u32 (uint32x2_t __a)
3379 return (uint64x1_t) __a;
3382 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3383 vreinterpret_u64_p8 (poly8x8_t __a)
3385 return (uint64x1_t) __a;
3388 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3389 vreinterpret_u64_p16 (poly16x4_t __a)
3391 return (uint64x1_t) __a;
3394 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3395 vreinterpretq_u64_f64 (float64x2_t __a)
3397 return (uint64x2_t) __a;
3400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3401 vreinterpretq_u64_s8 (int8x16_t __a)
3403 return (uint64x2_t) __a;
3406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3407 vreinterpretq_u64_s16 (int16x8_t __a)
3409 return (uint64x2_t) __a;
3412 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3413 vreinterpretq_u64_s32 (int32x4_t __a)
3415 return (uint64x2_t) __a;
3418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3419 vreinterpretq_u64_s64 (int64x2_t __a)
3421 return (uint64x2_t) __a;
3424 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3425 vreinterpretq_u64_f32 (float32x4_t __a)
3427 return (uint64x2_t) __a;
3430 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3431 vreinterpretq_u64_u8 (uint8x16_t __a)
3433 return (uint64x2_t) __a;
3436 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3437 vreinterpretq_u64_u16 (uint16x8_t __a)
3439 return (uint64x2_t) __a;
3442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3443 vreinterpretq_u64_u32 (uint32x4_t __a)
3445 return (uint64x2_t) __a;
3448 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3449 vreinterpretq_u64_p8 (poly8x16_t __a)
3451 return (uint64x2_t) __a;
3454 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3455 vreinterpretq_u64_p16 (poly16x8_t __a)
3457 return (uint64x2_t) __a;
3460 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3461 vreinterpret_s8_f64 (float64x1_t __a)
3463 return __builtin_aarch64_reinterpretv8qiv1df (__a);
3466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3467 vreinterpret_s8_s16 (int16x4_t __a)
3469 return (int8x8_t) __a;
3472 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3473 vreinterpret_s8_s32 (int32x2_t __a)
3475 return (int8x8_t) __a;
3478 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3479 vreinterpret_s8_s64 (int64x1_t __a)
3481 return (int8x8_t) __a;
3484 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3485 vreinterpret_s8_f32 (float32x2_t __a)
3487 return (int8x8_t) __a;
3490 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3491 vreinterpret_s8_u8 (uint8x8_t __a)
3493 return (int8x8_t) __a;
3496 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3497 vreinterpret_s8_u16 (uint16x4_t __a)
3499 return (int8x8_t) __a;
3502 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3503 vreinterpret_s8_u32 (uint32x2_t __a)
3505 return (int8x8_t) __a;
3508 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3509 vreinterpret_s8_u64 (uint64x1_t __a)
3511 return (int8x8_t) __a;
3514 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3515 vreinterpret_s8_p8 (poly8x8_t __a)
3517 return (int8x8_t) __a;
3520 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3521 vreinterpret_s8_p16 (poly16x4_t __a)
3523 return (int8x8_t) __a;
3526 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3527 vreinterpretq_s8_f64 (float64x2_t __a)
3529 return (int8x16_t) __a;
3532 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3533 vreinterpretq_s8_s16 (int16x8_t __a)
3535 return (int8x16_t) __a;
3538 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3539 vreinterpretq_s8_s32 (int32x4_t __a)
3541 return (int8x16_t) __a;
3544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3545 vreinterpretq_s8_s64 (int64x2_t __a)
3547 return (int8x16_t) __a;
3550 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3551 vreinterpretq_s8_f32 (float32x4_t __a)
3553 return (int8x16_t) __a;
3556 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3557 vreinterpretq_s8_u8 (uint8x16_t __a)
3559 return (int8x16_t) __a;
3562 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3563 vreinterpretq_s8_u16 (uint16x8_t __a)
3565 return (int8x16_t) __a;
3568 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3569 vreinterpretq_s8_u32 (uint32x4_t __a)
3571 return (int8x16_t) __a;
3574 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3575 vreinterpretq_s8_u64 (uint64x2_t __a)
3577 return (int8x16_t) __a;
3580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3581 vreinterpretq_s8_p8 (poly8x16_t __a)
3583 return (int8x16_t) __a;
3586 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3587 vreinterpretq_s8_p16 (poly16x8_t __a)
3589 return (int8x16_t) __a;
3592 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3593 vreinterpret_s16_f64 (float64x1_t __a)
3595 return __builtin_aarch64_reinterpretv4hiv1df (__a);
3598 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3599 vreinterpret_s16_s8 (int8x8_t __a)
3601 return (int16x4_t) __a;
3604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3605 vreinterpret_s16_s32 (int32x2_t __a)
3607 return (int16x4_t) __a;
3610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3611 vreinterpret_s16_s64 (int64x1_t __a)
3613 return (int16x4_t) __a;
3616 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3617 vreinterpret_s16_f32 (float32x2_t __a)
3619 return (int16x4_t) __a;
3622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3623 vreinterpret_s16_u8 (uint8x8_t __a)
3625 return (int16x4_t) __a;
3628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3629 vreinterpret_s16_u16 (uint16x4_t __a)
3631 return (int16x4_t) __a;
3634 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3635 vreinterpret_s16_u32 (uint32x2_t __a)
3637 return (int16x4_t) __a;
3640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3641 vreinterpret_s16_u64 (uint64x1_t __a)
3643 return (int16x4_t) __a;
3646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3647 vreinterpret_s16_p8 (poly8x8_t __a)
3649 return (int16x4_t) __a;
3652 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3653 vreinterpret_s16_p16 (poly16x4_t __a)
3655 return (int16x4_t) __a;
3658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3659 vreinterpretq_s16_f64 (float64x2_t __a)
3661 return (int16x8_t) __a;
3664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3665 vreinterpretq_s16_s8 (int8x16_t __a)
3667 return (int16x8_t) __a;
3670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3671 vreinterpretq_s16_s32 (int32x4_t __a)
3673 return (int16x8_t) __a;
3676 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3677 vreinterpretq_s16_s64 (int64x2_t __a)
3679 return (int16x8_t) __a;
3682 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3683 vreinterpretq_s16_f32 (float32x4_t __a)
3685 return (int16x8_t) __a;
3688 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3689 vreinterpretq_s16_u8 (uint8x16_t __a)
3691 return (int16x8_t) __a;
3694 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3695 vreinterpretq_s16_u16 (uint16x8_t __a)
3697 return (int16x8_t) __a;
3700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3701 vreinterpretq_s16_u32 (uint32x4_t __a)
3703 return (int16x8_t) __a;
3706 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3707 vreinterpretq_s16_u64 (uint64x2_t __a)
3709 return (int16x8_t) __a;
3712 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3713 vreinterpretq_s16_p8 (poly8x16_t __a)
3715 return (int16x8_t) __a;
3718 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3719 vreinterpretq_s16_p16 (poly16x8_t __a)
3721 return (int16x8_t) __a;
3724 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3725 vreinterpret_s32_f64 (float64x1_t __a)
3727 return __builtin_aarch64_reinterpretv2siv1df (__a);
3730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3731 vreinterpret_s32_s8 (int8x8_t __a)
3733 return (int32x2_t) __a;
3736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3737 vreinterpret_s32_s16 (int16x4_t __a)
3739 return (int32x2_t) __a;
3742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3743 vreinterpret_s32_s64 (int64x1_t __a)
3745 return (int32x2_t) __a;
3748 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3749 vreinterpret_s32_f32 (float32x2_t __a)
3751 return (int32x2_t) __a;
3754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3755 vreinterpret_s32_u8 (uint8x8_t __a)
3757 return (int32x2_t) __a;
3760 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3761 vreinterpret_s32_u16 (uint16x4_t __a)
3763 return (int32x2_t) __a;
3766 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3767 vreinterpret_s32_u32 (uint32x2_t __a)
3769 return (int32x2_t) __a;
3772 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3773 vreinterpret_s32_u64 (uint64x1_t __a)
3775 return (int32x2_t) __a;
3778 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3779 vreinterpret_s32_p8 (poly8x8_t __a)
3781 return (int32x2_t) __a;
3784 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3785 vreinterpret_s32_p16 (poly16x4_t __a)
3787 return (int32x2_t) __a;
3790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3791 vreinterpretq_s32_f64 (float64x2_t __a)
3793 return (int32x4_t) __a;
3796 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3797 vreinterpretq_s32_s8 (int8x16_t __a)
3799 return (int32x4_t) __a;
3802 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3803 vreinterpretq_s32_s16 (int16x8_t __a)
3805 return (int32x4_t) __a;
3808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3809 vreinterpretq_s32_s64 (int64x2_t __a)
3811 return (int32x4_t) __a;
3814 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3815 vreinterpretq_s32_f32 (float32x4_t __a)
3817 return (int32x4_t) __a;
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3821 vreinterpretq_s32_u8 (uint8x16_t __a)
3823 return (int32x4_t) __a;
3826 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3827 vreinterpretq_s32_u16 (uint16x8_t __a)
3829 return (int32x4_t) __a;
3832 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3833 vreinterpretq_s32_u32 (uint32x4_t __a)
3835 return (int32x4_t) __a;
3838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3839 vreinterpretq_s32_u64 (uint64x2_t __a)
3841 return (int32x4_t) __a;
3844 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3845 vreinterpretq_s32_p8 (poly8x16_t __a)
3847 return (int32x4_t) __a;
3850 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3851 vreinterpretq_s32_p16 (poly16x8_t __a)
3853 return (int32x4_t) __a;
3856 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3857 vreinterpret_u8_f64 (float64x1_t __a)
3859 return __builtin_aarch64_reinterpretv8qiv1df_us (__a);
3862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3863 vreinterpret_u8_s8 (int8x8_t __a)
3865 return (uint8x8_t) __a;
3868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3869 vreinterpret_u8_s16 (int16x4_t __a)
3871 return (uint8x8_t) __a;
3874 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3875 vreinterpret_u8_s32 (int32x2_t __a)
3877 return (uint8x8_t) __a;
3880 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3881 vreinterpret_u8_s64 (int64x1_t __a)
3883 return (uint8x8_t) __a;
3886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3887 vreinterpret_u8_f32 (float32x2_t __a)
3889 return (uint8x8_t) __a;
3892 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3893 vreinterpret_u8_u16 (uint16x4_t __a)
3895 return (uint8x8_t) __a;
3898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3899 vreinterpret_u8_u32 (uint32x2_t __a)
3901 return (uint8x8_t) __a;
3904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3905 vreinterpret_u8_u64 (uint64x1_t __a)
3907 return (uint8x8_t) __a;
3910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3911 vreinterpret_u8_p8 (poly8x8_t __a)
3913 return (uint8x8_t) __a;
3916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3917 vreinterpret_u8_p16 (poly16x4_t __a)
3919 return (uint8x8_t) __a;
3922 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3923 vreinterpretq_u8_f64 (float64x2_t __a)
3925 return (uint8x16_t) __a;
3928 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3929 vreinterpretq_u8_s8 (int8x16_t __a)
3931 return (uint8x16_t) __a;
3934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3935 vreinterpretq_u8_s16 (int16x8_t __a)
3937 return (uint8x16_t) __a;
3940 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3941 vreinterpretq_u8_s32 (int32x4_t __a)
3943 return (uint8x16_t) __a;
3946 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3947 vreinterpretq_u8_s64 (int64x2_t __a)
3949 return (uint8x16_t) __a;
3952 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3953 vreinterpretq_u8_f32 (float32x4_t __a)
3955 return (uint8x16_t) __a;
3958 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3959 vreinterpretq_u8_u16 (uint16x8_t __a)
3961 return (uint8x16_t) __a;
3964 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3965 vreinterpretq_u8_u32 (uint32x4_t __a)
3967 return (uint8x16_t) __a;
3970 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3971 vreinterpretq_u8_u64 (uint64x2_t __a)
3973 return (uint8x16_t) __a;
3976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3977 vreinterpretq_u8_p8 (poly8x16_t __a)
3979 return (uint8x16_t) __a;
3982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3983 vreinterpretq_u8_p16 (poly16x8_t __a)
3985 return (uint8x16_t) __a;
3988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3989 vreinterpret_u16_f64 (float64x1_t __a)
3991 return __builtin_aarch64_reinterpretv4hiv1df_us (__a);
3994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3995 vreinterpret_u16_s8 (int8x8_t __a)
3997 return (uint16x4_t) __a;
4000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4001 vreinterpret_u16_s16 (int16x4_t __a)
4003 return (uint16x4_t) __a;
4006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4007 vreinterpret_u16_s32 (int32x2_t __a)
4009 return (uint16x4_t) __a;
4012 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4013 vreinterpret_u16_s64 (int64x1_t __a)
4015 return (uint16x4_t) __a;
4018 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4019 vreinterpret_u16_f32 (float32x2_t __a)
4021 return (uint16x4_t) __a;
4024 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4025 vreinterpret_u16_u8 (uint8x8_t __a)
4027 return (uint16x4_t) __a;
4030 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4031 vreinterpret_u16_u32 (uint32x2_t __a)
4033 return (uint16x4_t) __a;
4036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4037 vreinterpret_u16_u64 (uint64x1_t __a)
4039 return (uint16x4_t) __a;
4042 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4043 vreinterpret_u16_p8 (poly8x8_t __a)
4045 return (uint16x4_t) __a;
4048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4049 vreinterpret_u16_p16 (poly16x4_t __a)
4051 return (uint16x4_t) __a;
4054 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4055 vreinterpretq_u16_f64 (float64x2_t __a)
4057 return (uint16x8_t) __a;
4060 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4061 vreinterpretq_u16_s8 (int8x16_t __a)
4063 return (uint16x8_t) __a;
4066 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4067 vreinterpretq_u16_s16 (int16x8_t __a)
4069 return (uint16x8_t) __a;
4072 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4073 vreinterpretq_u16_s32 (int32x4_t __a)
4075 return (uint16x8_t) __a;
4078 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4079 vreinterpretq_u16_s64 (int64x2_t __a)
4081 return (uint16x8_t) __a;
4084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4085 vreinterpretq_u16_f32 (float32x4_t __a)
4087 return (uint16x8_t) __a;
4090 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4091 vreinterpretq_u16_u8 (uint8x16_t __a)
4093 return (uint16x8_t) __a;
4096 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4097 vreinterpretq_u16_u32 (uint32x4_t __a)
4099 return (uint16x8_t) __a;
4102 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4103 vreinterpretq_u16_u64 (uint64x2_t __a)
4105 return (uint16x8_t) __a;
4108 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4109 vreinterpretq_u16_p8 (poly8x16_t __a)
4111 return (uint16x8_t) __a;
4114 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4115 vreinterpretq_u16_p16 (poly16x8_t __a)
4117 return (uint16x8_t) __a;
4120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4121 vreinterpret_u32_f64 (float64x1_t __a)
4123 return __builtin_aarch64_reinterpretv2siv1df_us (__a);
4126 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4127 vreinterpret_u32_s8 (int8x8_t __a)
4129 return (uint32x2_t) __a;
4132 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4133 vreinterpret_u32_s16 (int16x4_t __a)
4135 return (uint32x2_t) __a;
4138 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4139 vreinterpret_u32_s32 (int32x2_t __a)
4141 return (uint32x2_t) __a;
4144 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4145 vreinterpret_u32_s64 (int64x1_t __a)
4147 return (uint32x2_t) __a;
4150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4151 vreinterpret_u32_f32 (float32x2_t __a)
4153 return (uint32x2_t) __a;
4156 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4157 vreinterpret_u32_u8 (uint8x8_t __a)
4159 return (uint32x2_t) __a;
4162 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4163 vreinterpret_u32_u16 (uint16x4_t __a)
4165 return (uint32x2_t) __a;
4168 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4169 vreinterpret_u32_u64 (uint64x1_t __a)
4171 return (uint32x2_t) __a;
4174 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4175 vreinterpret_u32_p8 (poly8x8_t __a)
4177 return (uint32x2_t) __a;
4180 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4181 vreinterpret_u32_p16 (poly16x4_t __a)
4183 return (uint32x2_t) __a;
4186 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4187 vreinterpretq_u32_f64 (float64x2_t __a)
4189 return (uint32x4_t) __a;
4192 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4193 vreinterpretq_u32_s8 (int8x16_t __a)
4195 return (uint32x4_t) __a;
4198 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4199 vreinterpretq_u32_s16 (int16x8_t __a)
4201 return (uint32x4_t) __a;
4204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4205 vreinterpretq_u32_s32 (int32x4_t __a)
4207 return (uint32x4_t) __a;
4210 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4211 vreinterpretq_u32_s64 (int64x2_t __a)
4213 return (uint32x4_t) __a;
4216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4217 vreinterpretq_u32_f32 (float32x4_t __a)
4219 return (uint32x4_t) __a;
4222 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4223 vreinterpretq_u32_u8 (uint8x16_t __a)
4225 return (uint32x4_t) __a;
4228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4229 vreinterpretq_u32_u16 (uint16x8_t __a)
4231 return (uint32x4_t) __a;
4234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4235 vreinterpretq_u32_u64 (uint64x2_t __a)
4237 return (uint32x4_t) __a;
4240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4241 vreinterpretq_u32_p8 (poly8x16_t __a)
4243 return (uint32x4_t) __a;
4246 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4247 vreinterpretq_u32_p16 (poly16x8_t __a)
4249 return (uint32x4_t) __a;
4252 #define __GET_LOW(__TYPE) \
4253 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4254 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4255 return vreinterpret_##__TYPE##_u64 (lo);
4257 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4258 vget_low_f32 (float32x4_t __a)
4260 __GET_LOW (f32);
4263 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4264 vget_low_f64 (float64x2_t __a)
4266 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4269 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4270 vget_low_p8 (poly8x16_t __a)
4272 __GET_LOW (p8);
4275 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4276 vget_low_p16 (poly16x8_t __a)
4278 __GET_LOW (p16);
4281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4282 vget_low_s8 (int8x16_t __a)
4284 __GET_LOW (s8);
4287 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4288 vget_low_s16 (int16x8_t __a)
4290 __GET_LOW (s16);
4293 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4294 vget_low_s32 (int32x4_t __a)
4296 __GET_LOW (s32);
4299 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4300 vget_low_s64 (int64x2_t __a)
4302 __GET_LOW (s64);
4305 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4306 vget_low_u8 (uint8x16_t __a)
4308 __GET_LOW (u8);
4311 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4312 vget_low_u16 (uint16x8_t __a)
4314 __GET_LOW (u16);
4317 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4318 vget_low_u32 (uint32x4_t __a)
4320 __GET_LOW (u32);
4323 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4324 vget_low_u64 (uint64x2_t __a)
4326 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4329 #undef __GET_LOW
4331 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4332 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4334 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4337 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4338 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4340 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4343 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4344 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4346 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4349 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4350 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4352 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4356 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4358 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4361 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4362 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4364 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4365 (int8x8_t) __b);
4368 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4369 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4371 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4372 (int16x4_t) __b);
4375 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4376 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4378 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4379 (int32x2_t) __b);
4382 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4383 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4385 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4388 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4389 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4391 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4394 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4395 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4397 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4398 (int8x8_t) __b);
4401 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4402 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4404 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4405 (int16x4_t) __b);
4408 /* Start of temporary inline asm implementations. */
4410 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4411 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4413 int8x8_t result;
4414 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4415 : "=w"(result)
4416 : "0"(a), "w"(b), "w"(c)
4417 : /* No clobbers */);
4418 return result;
4421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4422 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4424 int16x4_t result;
4425 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4426 : "=w"(result)
4427 : "0"(a), "w"(b), "w"(c)
4428 : /* No clobbers */);
4429 return result;
4432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4433 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4435 int32x2_t result;
4436 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4437 : "=w"(result)
4438 : "0"(a), "w"(b), "w"(c)
4439 : /* No clobbers */);
4440 return result;
4443 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4444 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4446 uint8x8_t result;
4447 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4448 : "=w"(result)
4449 : "0"(a), "w"(b), "w"(c)
4450 : /* No clobbers */);
4451 return result;
4454 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4455 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4457 uint16x4_t result;
4458 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4459 : "=w"(result)
4460 : "0"(a), "w"(b), "w"(c)
4461 : /* No clobbers */);
4462 return result;
4465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4466 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4468 uint32x2_t result;
4469 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4470 : "=w"(result)
4471 : "0"(a), "w"(b), "w"(c)
4472 : /* No clobbers */);
4473 return result;
4476 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4477 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4479 int16x8_t result;
4480 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4481 : "=w"(result)
4482 : "0"(a), "w"(b), "w"(c)
4483 : /* No clobbers */);
4484 return result;
4487 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4488 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4490 int32x4_t result;
4491 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4492 : "=w"(result)
4493 : "0"(a), "w"(b), "w"(c)
4494 : /* No clobbers */);
4495 return result;
4498 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4499 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4501 int64x2_t result;
4502 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4503 : "=w"(result)
4504 : "0"(a), "w"(b), "w"(c)
4505 : /* No clobbers */);
4506 return result;
4509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4510 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4512 uint16x8_t result;
4513 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4514 : "=w"(result)
4515 : "0"(a), "w"(b), "w"(c)
4516 : /* No clobbers */);
4517 return result;
4520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4521 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4523 uint32x4_t result;
4524 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4525 : "=w"(result)
4526 : "0"(a), "w"(b), "w"(c)
4527 : /* No clobbers */);
4528 return result;
4531 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4532 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4534 uint64x2_t result;
4535 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4536 : "=w"(result)
4537 : "0"(a), "w"(b), "w"(c)
4538 : /* No clobbers */);
4539 return result;
4542 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4543 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4545 int16x8_t result;
4546 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4547 : "=w"(result)
4548 : "0"(a), "w"(b), "w"(c)
4549 : /* No clobbers */);
4550 return result;
4553 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4554 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4556 int32x4_t result;
4557 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4558 : "=w"(result)
4559 : "0"(a), "w"(b), "w"(c)
4560 : /* No clobbers */);
4561 return result;
4564 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4565 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4567 int64x2_t result;
4568 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4569 : "=w"(result)
4570 : "0"(a), "w"(b), "w"(c)
4571 : /* No clobbers */);
4572 return result;
4575 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4576 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4578 uint16x8_t result;
4579 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4580 : "=w"(result)
4581 : "0"(a), "w"(b), "w"(c)
4582 : /* No clobbers */);
4583 return result;
4586 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4587 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4589 uint32x4_t result;
4590 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4591 : "=w"(result)
4592 : "0"(a), "w"(b), "w"(c)
4593 : /* No clobbers */);
4594 return result;
4597 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4598 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4600 uint64x2_t result;
4601 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4602 : "=w"(result)
4603 : "0"(a), "w"(b), "w"(c)
4604 : /* No clobbers */);
4605 return result;
4608 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4609 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4611 int8x16_t result;
4612 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4613 : "=w"(result)
4614 : "0"(a), "w"(b), "w"(c)
4615 : /* No clobbers */);
4616 return result;
4619 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4620 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4622 int16x8_t result;
4623 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4624 : "=w"(result)
4625 : "0"(a), "w"(b), "w"(c)
4626 : /* No clobbers */);
4627 return result;
4630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4631 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4633 int32x4_t result;
4634 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4635 : "=w"(result)
4636 : "0"(a), "w"(b), "w"(c)
4637 : /* No clobbers */);
4638 return result;
4641 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4642 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4644 uint8x16_t result;
4645 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4646 : "=w"(result)
4647 : "0"(a), "w"(b), "w"(c)
4648 : /* No clobbers */);
4649 return result;
4652 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4653 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4655 uint16x8_t result;
4656 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4657 : "=w"(result)
4658 : "0"(a), "w"(b), "w"(c)
4659 : /* No clobbers */);
4660 return result;
4663 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4664 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4666 uint32x4_t result;
4667 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4668 : "=w"(result)
4669 : "0"(a), "w"(b), "w"(c)
4670 : /* No clobbers */);
4671 return result;
4674 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4675 vabd_f32 (float32x2_t a, float32x2_t b)
4677 float32x2_t result;
4678 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4679 : "=w"(result)
4680 : "w"(a), "w"(b)
4681 : /* No clobbers */);
4682 return result;
4685 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4686 vabd_s8 (int8x8_t a, int8x8_t b)
4688 int8x8_t result;
4689 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4690 : "=w"(result)
4691 : "w"(a), "w"(b)
4692 : /* No clobbers */);
4693 return result;
4696 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4697 vabd_s16 (int16x4_t a, int16x4_t b)
4699 int16x4_t result;
4700 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4701 : "=w"(result)
4702 : "w"(a), "w"(b)
4703 : /* No clobbers */);
4704 return result;
4707 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4708 vabd_s32 (int32x2_t a, int32x2_t b)
4710 int32x2_t result;
4711 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4712 : "=w"(result)
4713 : "w"(a), "w"(b)
4714 : /* No clobbers */);
4715 return result;
4718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4719 vabd_u8 (uint8x8_t a, uint8x8_t b)
4721 uint8x8_t result;
4722 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4723 : "=w"(result)
4724 : "w"(a), "w"(b)
4725 : /* No clobbers */);
4726 return result;
4729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4730 vabd_u16 (uint16x4_t a, uint16x4_t b)
4732 uint16x4_t result;
4733 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4734 : "=w"(result)
4735 : "w"(a), "w"(b)
4736 : /* No clobbers */);
4737 return result;
4740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4741 vabd_u32 (uint32x2_t a, uint32x2_t b)
4743 uint32x2_t result;
4744 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4745 : "=w"(result)
4746 : "w"(a), "w"(b)
4747 : /* No clobbers */);
4748 return result;
4751 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4752 vabdd_f64 (float64_t a, float64_t b)
4754 float64_t result;
4755 __asm__ ("fabd %d0, %d1, %d2"
4756 : "=w"(result)
4757 : "w"(a), "w"(b)
4758 : /* No clobbers */);
4759 return result;
4762 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4763 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4765 int16x8_t result;
4766 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4767 : "=w"(result)
4768 : "w"(a), "w"(b)
4769 : /* No clobbers */);
4770 return result;
4773 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4774 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4776 int32x4_t result;
4777 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4778 : "=w"(result)
4779 : "w"(a), "w"(b)
4780 : /* No clobbers */);
4781 return result;
4784 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4785 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4787 int64x2_t result;
4788 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4789 : "=w"(result)
4790 : "w"(a), "w"(b)
4791 : /* No clobbers */);
4792 return result;
4795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4796 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4798 uint16x8_t result;
4799 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4800 : "=w"(result)
4801 : "w"(a), "w"(b)
4802 : /* No clobbers */);
4803 return result;
4806 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4807 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4809 uint32x4_t result;
4810 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4811 : "=w"(result)
4812 : "w"(a), "w"(b)
4813 : /* No clobbers */);
4814 return result;
4817 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4818 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4820 uint64x2_t result;
4821 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4822 : "=w"(result)
4823 : "w"(a), "w"(b)
4824 : /* No clobbers */);
4825 return result;
4828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4829 vabdl_s8 (int8x8_t a, int8x8_t b)
4831 int16x8_t result;
4832 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4833 : "=w"(result)
4834 : "w"(a), "w"(b)
4835 : /* No clobbers */);
4836 return result;
4839 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4840 vabdl_s16 (int16x4_t a, int16x4_t b)
4842 int32x4_t result;
4843 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4844 : "=w"(result)
4845 : "w"(a), "w"(b)
4846 : /* No clobbers */);
4847 return result;
4850 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4851 vabdl_s32 (int32x2_t a, int32x2_t b)
4853 int64x2_t result;
4854 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4855 : "=w"(result)
4856 : "w"(a), "w"(b)
4857 : /* No clobbers */);
4858 return result;
4861 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4862 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4864 uint16x8_t result;
4865 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4866 : "=w"(result)
4867 : "w"(a), "w"(b)
4868 : /* No clobbers */);
4869 return result;
4872 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4873 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4875 uint32x4_t result;
4876 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4877 : "=w"(result)
4878 : "w"(a), "w"(b)
4879 : /* No clobbers */);
4880 return result;
4883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4884 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4886 uint64x2_t result;
4887 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4888 : "=w"(result)
4889 : "w"(a), "w"(b)
4890 : /* No clobbers */);
4891 return result;
4894 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4895 vabdq_f32 (float32x4_t a, float32x4_t b)
4897 float32x4_t result;
4898 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4899 : "=w"(result)
4900 : "w"(a), "w"(b)
4901 : /* No clobbers */);
4902 return result;
4905 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4906 vabdq_f64 (float64x2_t a, float64x2_t b)
4908 float64x2_t result;
4909 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4910 : "=w"(result)
4911 : "w"(a), "w"(b)
4912 : /* No clobbers */);
4913 return result;
4916 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4917 vabdq_s8 (int8x16_t a, int8x16_t b)
4919 int8x16_t result;
4920 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4921 : "=w"(result)
4922 : "w"(a), "w"(b)
4923 : /* No clobbers */);
4924 return result;
4927 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4928 vabdq_s16 (int16x8_t a, int16x8_t b)
4930 int16x8_t result;
4931 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4932 : "=w"(result)
4933 : "w"(a), "w"(b)
4934 : /* No clobbers */);
4935 return result;
4938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4939 vabdq_s32 (int32x4_t a, int32x4_t b)
4941 int32x4_t result;
4942 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4943 : "=w"(result)
4944 : "w"(a), "w"(b)
4945 : /* No clobbers */);
4946 return result;
4949 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4950 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4952 uint8x16_t result;
4953 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4954 : "=w"(result)
4955 : "w"(a), "w"(b)
4956 : /* No clobbers */);
4957 return result;
4960 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4961 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4963 uint16x8_t result;
4964 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4965 : "=w"(result)
4966 : "w"(a), "w"(b)
4967 : /* No clobbers */);
4968 return result;
4971 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4972 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4974 uint32x4_t result;
4975 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4976 : "=w"(result)
4977 : "w"(a), "w"(b)
4978 : /* No clobbers */);
4979 return result;
4982 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4983 vabds_f32 (float32_t a, float32_t b)
4985 float32_t result;
4986 __asm__ ("fabd %s0, %s1, %s2"
4987 : "=w"(result)
4988 : "w"(a), "w"(b)
4989 : /* No clobbers */);
4990 return result;
4993 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4994 vaddlv_s8 (int8x8_t a)
4996 int16_t result;
4997 __asm__ ("saddlv %h0,%1.8b"
4998 : "=w"(result)
4999 : "w"(a)
5000 : /* No clobbers */);
5001 return result;
5004 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5005 vaddlv_s16 (int16x4_t a)
5007 int32_t result;
5008 __asm__ ("saddlv %s0,%1.4h"
5009 : "=w"(result)
5010 : "w"(a)
5011 : /* No clobbers */);
5012 return result;
5015 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5016 vaddlv_u8 (uint8x8_t a)
5018 uint16_t result;
5019 __asm__ ("uaddlv %h0,%1.8b"
5020 : "=w"(result)
5021 : "w"(a)
5022 : /* No clobbers */);
5023 return result;
5026 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5027 vaddlv_u16 (uint16x4_t a)
5029 uint32_t result;
5030 __asm__ ("uaddlv %s0,%1.4h"
5031 : "=w"(result)
5032 : "w"(a)
5033 : /* No clobbers */);
5034 return result;
5037 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5038 vaddlvq_s8 (int8x16_t a)
5040 int16_t result;
5041 __asm__ ("saddlv %h0,%1.16b"
5042 : "=w"(result)
5043 : "w"(a)
5044 : /* No clobbers */);
5045 return result;
5048 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5049 vaddlvq_s16 (int16x8_t a)
5051 int32_t result;
5052 __asm__ ("saddlv %s0,%1.8h"
5053 : "=w"(result)
5054 : "w"(a)
5055 : /* No clobbers */);
5056 return result;
5059 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5060 vaddlvq_s32 (int32x4_t a)
5062 int64_t result;
5063 __asm__ ("saddlv %d0,%1.4s"
5064 : "=w"(result)
5065 : "w"(a)
5066 : /* No clobbers */);
5067 return result;
5070 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5071 vaddlvq_u8 (uint8x16_t a)
5073 uint16_t result;
5074 __asm__ ("uaddlv %h0,%1.16b"
5075 : "=w"(result)
5076 : "w"(a)
5077 : /* No clobbers */);
5078 return result;
5081 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5082 vaddlvq_u16 (uint16x8_t a)
5084 uint32_t result;
5085 __asm__ ("uaddlv %s0,%1.8h"
5086 : "=w"(result)
5087 : "w"(a)
5088 : /* No clobbers */);
5089 return result;
5092 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5093 vaddlvq_u32 (uint32x4_t a)
5095 uint64_t result;
5096 __asm__ ("uaddlv %d0,%1.4s"
5097 : "=w"(result)
5098 : "w"(a)
5099 : /* No clobbers */);
5100 return result;
5103 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5104 vcls_s8 (int8x8_t a)
5106 int8x8_t result;
5107 __asm__ ("cls %0.8b,%1.8b"
5108 : "=w"(result)
5109 : "w"(a)
5110 : /* No clobbers */);
5111 return result;
5114 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5115 vcls_s16 (int16x4_t a)
5117 int16x4_t result;
5118 __asm__ ("cls %0.4h,%1.4h"
5119 : "=w"(result)
5120 : "w"(a)
5121 : /* No clobbers */);
5122 return result;
5125 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5126 vcls_s32 (int32x2_t a)
5128 int32x2_t result;
5129 __asm__ ("cls %0.2s,%1.2s"
5130 : "=w"(result)
5131 : "w"(a)
5132 : /* No clobbers */);
5133 return result;
5136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5137 vclsq_s8 (int8x16_t a)
5139 int8x16_t result;
5140 __asm__ ("cls %0.16b,%1.16b"
5141 : "=w"(result)
5142 : "w"(a)
5143 : /* No clobbers */);
5144 return result;
5147 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5148 vclsq_s16 (int16x8_t a)
5150 int16x8_t result;
5151 __asm__ ("cls %0.8h,%1.8h"
5152 : "=w"(result)
5153 : "w"(a)
5154 : /* No clobbers */);
5155 return result;
5158 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5159 vclsq_s32 (int32x4_t a)
5161 int32x4_t result;
5162 __asm__ ("cls %0.4s,%1.4s"
5163 : "=w"(result)
5164 : "w"(a)
5165 : /* No clobbers */);
5166 return result;
5169 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5170 vcnt_p8 (poly8x8_t a)
5172 poly8x8_t result;
5173 __asm__ ("cnt %0.8b,%1.8b"
5174 : "=w"(result)
5175 : "w"(a)
5176 : /* No clobbers */);
5177 return result;
5180 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5181 vcnt_s8 (int8x8_t a)
5183 int8x8_t result;
5184 __asm__ ("cnt %0.8b,%1.8b"
5185 : "=w"(result)
5186 : "w"(a)
5187 : /* No clobbers */);
5188 return result;
5191 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5192 vcnt_u8 (uint8x8_t a)
5194 uint8x8_t result;
5195 __asm__ ("cnt %0.8b,%1.8b"
5196 : "=w"(result)
5197 : "w"(a)
5198 : /* No clobbers */);
5199 return result;
5202 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5203 vcntq_p8 (poly8x16_t a)
5205 poly8x16_t result;
5206 __asm__ ("cnt %0.16b,%1.16b"
5207 : "=w"(result)
5208 : "w"(a)
5209 : /* No clobbers */);
5210 return result;
5213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5214 vcntq_s8 (int8x16_t a)
5216 int8x16_t result;
5217 __asm__ ("cnt %0.16b,%1.16b"
5218 : "=w"(result)
5219 : "w"(a)
5220 : /* No clobbers */);
5221 return result;
5224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5225 vcntq_u8 (uint8x16_t a)
5227 uint8x16_t result;
5228 __asm__ ("cnt %0.16b,%1.16b"
5229 : "=w"(result)
5230 : "w"(a)
5231 : /* No clobbers */);
5232 return result;
5235 #define vcopyq_lane_f32(a, b, c, d) \
5236 __extension__ \
5237 ({ \
5238 float32x4_t c_ = (c); \
5239 float32x4_t a_ = (a); \
5240 float32x4_t result; \
5241 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5242 : "=w"(result) \
5243 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5244 : /* No clobbers */); \
5245 result; \
5248 #define vcopyq_lane_f64(a, b, c, d) \
5249 __extension__ \
5250 ({ \
5251 float64x2_t c_ = (c); \
5252 float64x2_t a_ = (a); \
5253 float64x2_t result; \
5254 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5255 : "=w"(result) \
5256 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5257 : /* No clobbers */); \
5258 result; \
5261 #define vcopyq_lane_p8(a, b, c, d) \
5262 __extension__ \
5263 ({ \
5264 poly8x16_t c_ = (c); \
5265 poly8x16_t a_ = (a); \
5266 poly8x16_t result; \
5267 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5268 : "=w"(result) \
5269 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5270 : /* No clobbers */); \
5271 result; \
5274 #define vcopyq_lane_p16(a, b, c, d) \
5275 __extension__ \
5276 ({ \
5277 poly16x8_t c_ = (c); \
5278 poly16x8_t a_ = (a); \
5279 poly16x8_t result; \
5280 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5281 : "=w"(result) \
5282 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5283 : /* No clobbers */); \
5284 result; \
5287 #define vcopyq_lane_s8(a, b, c, d) \
5288 __extension__ \
5289 ({ \
5290 int8x16_t c_ = (c); \
5291 int8x16_t a_ = (a); \
5292 int8x16_t result; \
5293 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5294 : "=w"(result) \
5295 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5296 : /* No clobbers */); \
5297 result; \
5300 #define vcopyq_lane_s16(a, b, c, d) \
5301 __extension__ \
5302 ({ \
5303 int16x8_t c_ = (c); \
5304 int16x8_t a_ = (a); \
5305 int16x8_t result; \
5306 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5307 : "=w"(result) \
5308 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5309 : /* No clobbers */); \
5310 result; \
5313 #define vcopyq_lane_s32(a, b, c, d) \
5314 __extension__ \
5315 ({ \
5316 int32x4_t c_ = (c); \
5317 int32x4_t a_ = (a); \
5318 int32x4_t result; \
5319 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5320 : "=w"(result) \
5321 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5322 : /* No clobbers */); \
5323 result; \
5326 #define vcopyq_lane_s64(a, b, c, d) \
5327 __extension__ \
5328 ({ \
5329 int64x2_t c_ = (c); \
5330 int64x2_t a_ = (a); \
5331 int64x2_t result; \
5332 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5333 : "=w"(result) \
5334 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5335 : /* No clobbers */); \
5336 result; \
5339 #define vcopyq_lane_u8(a, b, c, d) \
5340 __extension__ \
5341 ({ \
5342 uint8x16_t c_ = (c); \
5343 uint8x16_t a_ = (a); \
5344 uint8x16_t result; \
5345 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5346 : "=w"(result) \
5347 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5348 : /* No clobbers */); \
5349 result; \
5352 #define vcopyq_lane_u16(a, b, c, d) \
5353 __extension__ \
5354 ({ \
5355 uint16x8_t c_ = (c); \
5356 uint16x8_t a_ = (a); \
5357 uint16x8_t result; \
5358 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5359 : "=w"(result) \
5360 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5361 : /* No clobbers */); \
5362 result; \
5365 #define vcopyq_lane_u32(a, b, c, d) \
5366 __extension__ \
5367 ({ \
5368 uint32x4_t c_ = (c); \
5369 uint32x4_t a_ = (a); \
5370 uint32x4_t result; \
5371 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5372 : "=w"(result) \
5373 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5374 : /* No clobbers */); \
5375 result; \
5378 #define vcopyq_lane_u64(a, b, c, d) \
5379 __extension__ \
5380 ({ \
5381 uint64x2_t c_ = (c); \
5382 uint64x2_t a_ = (a); \
5383 uint64x2_t result; \
5384 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5385 : "=w"(result) \
5386 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5387 : /* No clobbers */); \
5388 result; \
5391 /* vcvt_f16_f32 not supported */
5393 /* vcvt_f32_f16 not supported */
5395 /* vcvt_high_f16_f32 not supported */
5397 /* vcvt_high_f32_f16 not supported */
5399 static float32x2_t vdup_n_f32 (float32_t);
5401 #define vcvt_n_f32_s32(a, b) \
5402 __extension__ \
5403 ({ \
5404 int32x2_t a_ = (a); \
5405 float32x2_t result; \
5406 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5407 : "=w"(result) \
5408 : "w"(a_), "i"(b) \
5409 : /* No clobbers */); \
5410 result; \
5413 #define vcvt_n_f32_u32(a, b) \
5414 __extension__ \
5415 ({ \
5416 uint32x2_t a_ = (a); \
5417 float32x2_t result; \
5418 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5419 : "=w"(result) \
5420 : "w"(a_), "i"(b) \
5421 : /* No clobbers */); \
5422 result; \
5425 #define vcvt_n_s32_f32(a, b) \
5426 __extension__ \
5427 ({ \
5428 float32x2_t a_ = (a); \
5429 int32x2_t result; \
5430 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5431 : "=w"(result) \
5432 : "w"(a_), "i"(b) \
5433 : /* No clobbers */); \
5434 result; \
5437 #define vcvt_n_u32_f32(a, b) \
5438 __extension__ \
5439 ({ \
5440 float32x2_t a_ = (a); \
5441 uint32x2_t result; \
5442 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5443 : "=w"(result) \
5444 : "w"(a_), "i"(b) \
5445 : /* No clobbers */); \
5446 result; \
5449 #define vcvtd_n_f64_s64(a, b) \
5450 __extension__ \
5451 ({ \
5452 int64_t a_ = (a); \
5453 float64_t result; \
5454 __asm__ ("scvtf %d0,%d1,%2" \
5455 : "=w"(result) \
5456 : "w"(a_), "i"(b) \
5457 : /* No clobbers */); \
5458 result; \
5461 #define vcvtd_n_f64_u64(a, b) \
5462 __extension__ \
5463 ({ \
5464 uint64_t a_ = (a); \
5465 float64_t result; \
5466 __asm__ ("ucvtf %d0,%d1,%2" \
5467 : "=w"(result) \
5468 : "w"(a_), "i"(b) \
5469 : /* No clobbers */); \
5470 result; \
5473 #define vcvtd_n_s64_f64(a, b) \
5474 __extension__ \
5475 ({ \
5476 float64_t a_ = (a); \
5477 int64_t result; \
5478 __asm__ ("fcvtzs %d0,%d1,%2" \
5479 : "=w"(result) \
5480 : "w"(a_), "i"(b) \
5481 : /* No clobbers */); \
5482 result; \
5485 #define vcvtd_n_u64_f64(a, b) \
5486 __extension__ \
5487 ({ \
5488 float64_t a_ = (a); \
5489 uint64_t result; \
5490 __asm__ ("fcvtzu %d0,%d1,%2" \
5491 : "=w"(result) \
5492 : "w"(a_), "i"(b) \
5493 : /* No clobbers */); \
5494 result; \
5497 #define vcvtq_n_f32_s32(a, b) \
5498 __extension__ \
5499 ({ \
5500 int32x4_t a_ = (a); \
5501 float32x4_t result; \
5502 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5503 : "=w"(result) \
5504 : "w"(a_), "i"(b) \
5505 : /* No clobbers */); \
5506 result; \
5509 #define vcvtq_n_f32_u32(a, b) \
5510 __extension__ \
5511 ({ \
5512 uint32x4_t a_ = (a); \
5513 float32x4_t result; \
5514 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5515 : "=w"(result) \
5516 : "w"(a_), "i"(b) \
5517 : /* No clobbers */); \
5518 result; \
5521 #define vcvtq_n_f64_s64(a, b) \
5522 __extension__ \
5523 ({ \
5524 int64x2_t a_ = (a); \
5525 float64x2_t result; \
5526 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5527 : "=w"(result) \
5528 : "w"(a_), "i"(b) \
5529 : /* No clobbers */); \
5530 result; \
5533 #define vcvtq_n_f64_u64(a, b) \
5534 __extension__ \
5535 ({ \
5536 uint64x2_t a_ = (a); \
5537 float64x2_t result; \
5538 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5539 : "=w"(result) \
5540 : "w"(a_), "i"(b) \
5541 : /* No clobbers */); \
5542 result; \
5545 #define vcvtq_n_s32_f32(a, b) \
5546 __extension__ \
5547 ({ \
5548 float32x4_t a_ = (a); \
5549 int32x4_t result; \
5550 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5551 : "=w"(result) \
5552 : "w"(a_), "i"(b) \
5553 : /* No clobbers */); \
5554 result; \
5557 #define vcvtq_n_s64_f64(a, b) \
5558 __extension__ \
5559 ({ \
5560 float64x2_t a_ = (a); \
5561 int64x2_t result; \
5562 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5563 : "=w"(result) \
5564 : "w"(a_), "i"(b) \
5565 : /* No clobbers */); \
5566 result; \
5569 #define vcvtq_n_u32_f32(a, b) \
5570 __extension__ \
5571 ({ \
5572 float32x4_t a_ = (a); \
5573 uint32x4_t result; \
5574 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5575 : "=w"(result) \
5576 : "w"(a_), "i"(b) \
5577 : /* No clobbers */); \
5578 result; \
5581 #define vcvtq_n_u64_f64(a, b) \
5582 __extension__ \
5583 ({ \
5584 float64x2_t a_ = (a); \
5585 uint64x2_t result; \
5586 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5587 : "=w"(result) \
5588 : "w"(a_), "i"(b) \
5589 : /* No clobbers */); \
5590 result; \
5593 #define vcvts_n_f32_s32(a, b) \
5594 __extension__ \
5595 ({ \
5596 int32_t a_ = (a); \
5597 float32_t result; \
5598 __asm__ ("scvtf %s0,%s1,%2" \
5599 : "=w"(result) \
5600 : "w"(a_), "i"(b) \
5601 : /* No clobbers */); \
5602 result; \
5605 #define vcvts_n_f32_u32(a, b) \
5606 __extension__ \
5607 ({ \
5608 uint32_t a_ = (a); \
5609 float32_t result; \
5610 __asm__ ("ucvtf %s0,%s1,%2" \
5611 : "=w"(result) \
5612 : "w"(a_), "i"(b) \
5613 : /* No clobbers */); \
5614 result; \
5617 #define vcvts_n_s32_f32(a, b) \
5618 __extension__ \
5619 ({ \
5620 float32_t a_ = (a); \
5621 int32_t result; \
5622 __asm__ ("fcvtzs %s0,%s1,%2" \
5623 : "=w"(result) \
5624 : "w"(a_), "i"(b) \
5625 : /* No clobbers */); \
5626 result; \
5629 #define vcvts_n_u32_f32(a, b) \
5630 __extension__ \
5631 ({ \
5632 float32_t a_ = (a); \
5633 uint32_t result; \
5634 __asm__ ("fcvtzu %s0,%s1,%2" \
5635 : "=w"(result) \
5636 : "w"(a_), "i"(b) \
5637 : /* No clobbers */); \
5638 result; \
5641 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5642 vcvtx_f32_f64 (float64x2_t a)
5644 float32x2_t result;
5645 __asm__ ("fcvtxn %0.2s,%1.2d"
5646 : "=w"(result)
5647 : "w"(a)
5648 : /* No clobbers */);
5649 return result;
5652 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5653 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5655 float32x4_t result;
5656 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5657 : "=w"(result)
5658 : "w" (b), "0"(a)
5659 : /* No clobbers */);
5660 return result;
5663 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5664 vcvtxd_f32_f64 (float64_t a)
5666 float32_t result;
5667 __asm__ ("fcvtxn %s0,%d1"
5668 : "=w"(result)
5669 : "w"(a)
5670 : /* No clobbers */);
5671 return result;
5674 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5675 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5677 float32x2_t result;
5678 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5679 : "=w"(result)
5680 : "0"(a), "w"(b), "w"(c)
5681 : /* No clobbers */);
5682 return result;
5685 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5686 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5688 float32x4_t result;
5689 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5690 : "=w"(result)
5691 : "0"(a), "w"(b), "w"(c)
5692 : /* No clobbers */);
5693 return result;
5696 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5697 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5699 float64x2_t result;
5700 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5701 : "=w"(result)
5702 : "0"(a), "w"(b), "w"(c)
5703 : /* No clobbers */);
5704 return result;
5707 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5708 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5710 float32x2_t result;
5711 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5712 : "=w"(result)
5713 : "0"(a), "w"(b), "w"(c)
5714 : /* No clobbers */);
5715 return result;
5718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5719 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5721 float32x4_t result;
5722 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5723 : "=w"(result)
5724 : "0"(a), "w"(b), "w"(c)
5725 : /* No clobbers */);
5726 return result;
5729 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5730 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5732 float64x2_t result;
5733 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5734 : "=w"(result)
5735 : "0"(a), "w"(b), "w"(c)
5736 : /* No clobbers */);
5737 return result;
5740 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5741 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5743 float32x2_t result;
5744 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5745 : "=w"(result)
5746 : "0"(a), "w"(b), "w"(c)
5747 : /* No clobbers */);
5748 return result;
5751 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5752 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5754 float32x4_t result;
5755 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5756 : "=w"(result)
5757 : "0"(a), "w"(b), "w"(c)
5758 : /* No clobbers */);
5759 return result;
5762 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5763 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5765 float64x2_t result;
5766 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5767 : "=w"(result)
5768 : "0"(a), "w"(b), "w"(c)
5769 : /* No clobbers */);
5770 return result;
5773 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5774 vget_high_f32 (float32x4_t a)
5776 float32x2_t result;
5777 __asm__ ("ins %0.d[0], %1.d[1]"
5778 : "=w"(result)
5779 : "w"(a)
5780 : /* No clobbers */);
5781 return result;
5784 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5785 vget_high_f64 (float64x2_t a)
5787 float64x1_t result;
5788 __asm__ ("ins %0.d[0], %1.d[1]"
5789 : "=w"(result)
5790 : "w"(a)
5791 : /* No clobbers */);
5792 return result;
5795 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5796 vget_high_p8 (poly8x16_t a)
5798 poly8x8_t result;
5799 __asm__ ("ins %0.d[0], %1.d[1]"
5800 : "=w"(result)
5801 : "w"(a)
5802 : /* No clobbers */);
5803 return result;
5806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5807 vget_high_p16 (poly16x8_t a)
5809 poly16x4_t result;
5810 __asm__ ("ins %0.d[0], %1.d[1]"
5811 : "=w"(result)
5812 : "w"(a)
5813 : /* No clobbers */);
5814 return result;
5817 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5818 vget_high_s8 (int8x16_t a)
5820 int8x8_t result;
5821 __asm__ ("ins %0.d[0], %1.d[1]"
5822 : "=w"(result)
5823 : "w"(a)
5824 : /* No clobbers */);
5825 return result;
5828 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5829 vget_high_s16 (int16x8_t a)
5831 int16x4_t result;
5832 __asm__ ("ins %0.d[0], %1.d[1]"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5839 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5840 vget_high_s32 (int32x4_t a)
5842 int32x2_t result;
5843 __asm__ ("ins %0.d[0], %1.d[1]"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5850 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5851 vget_high_s64 (int64x2_t a)
5853 int64x1_t result;
5854 __asm__ ("ins %0.d[0], %1.d[1]"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5861 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5862 vget_high_u8 (uint8x16_t a)
5864 uint8x8_t result;
5865 __asm__ ("ins %0.d[0], %1.d[1]"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5872 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5873 vget_high_u16 (uint16x8_t a)
5875 uint16x4_t result;
5876 __asm__ ("ins %0.d[0], %1.d[1]"
5877 : "=w"(result)
5878 : "w"(a)
5879 : /* No clobbers */);
5880 return result;
5883 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5884 vget_high_u32 (uint32x4_t a)
5886 uint32x2_t result;
5887 __asm__ ("ins %0.d[0], %1.d[1]"
5888 : "=w"(result)
5889 : "w"(a)
5890 : /* No clobbers */);
5891 return result;
5894 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5895 vget_high_u64 (uint64x2_t a)
5897 uint64x1_t result;
5898 __asm__ ("ins %0.d[0], %1.d[1]"
5899 : "=w"(result)
5900 : "w"(a)
5901 : /* No clobbers */);
5902 return result;
5905 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5906 vhsub_s8 (int8x8_t a, int8x8_t b)
5908 int8x8_t result;
5909 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5910 : "=w"(result)
5911 : "w"(a), "w"(b)
5912 : /* No clobbers */);
5913 return result;
5916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5917 vhsub_s16 (int16x4_t a, int16x4_t b)
5919 int16x4_t result;
5920 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5921 : "=w"(result)
5922 : "w"(a), "w"(b)
5923 : /* No clobbers */);
5924 return result;
5927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5928 vhsub_s32 (int32x2_t a, int32x2_t b)
5930 int32x2_t result;
5931 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5932 : "=w"(result)
5933 : "w"(a), "w"(b)
5934 : /* No clobbers */);
5935 return result;
5938 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5939 vhsub_u8 (uint8x8_t a, uint8x8_t b)
5941 uint8x8_t result;
5942 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5943 : "=w"(result)
5944 : "w"(a), "w"(b)
5945 : /* No clobbers */);
5946 return result;
5949 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5950 vhsub_u16 (uint16x4_t a, uint16x4_t b)
5952 uint16x4_t result;
5953 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
5954 : "=w"(result)
5955 : "w"(a), "w"(b)
5956 : /* No clobbers */);
5957 return result;
5960 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5961 vhsub_u32 (uint32x2_t a, uint32x2_t b)
5963 uint32x2_t result;
5964 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
5965 : "=w"(result)
5966 : "w"(a), "w"(b)
5967 : /* No clobbers */);
5968 return result;
5971 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5972 vhsubq_s8 (int8x16_t a, int8x16_t b)
5974 int8x16_t result;
5975 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
5976 : "=w"(result)
5977 : "w"(a), "w"(b)
5978 : /* No clobbers */);
5979 return result;
5982 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5983 vhsubq_s16 (int16x8_t a, int16x8_t b)
5985 int16x8_t result;
5986 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
5987 : "=w"(result)
5988 : "w"(a), "w"(b)
5989 : /* No clobbers */);
5990 return result;
5993 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5994 vhsubq_s32 (int32x4_t a, int32x4_t b)
5996 int32x4_t result;
5997 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
5998 : "=w"(result)
5999 : "w"(a), "w"(b)
6000 : /* No clobbers */);
6001 return result;
6004 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6005 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6007 uint8x16_t result;
6008 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6009 : "=w"(result)
6010 : "w"(a), "w"(b)
6011 : /* No clobbers */);
6012 return result;
6015 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6016 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6018 uint16x8_t result;
6019 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6020 : "=w"(result)
6021 : "w"(a), "w"(b)
6022 : /* No clobbers */);
6023 return result;
6026 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6027 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6029 uint32x4_t result;
6030 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6031 : "=w"(result)
6032 : "w"(a), "w"(b)
6033 : /* No clobbers */);
6034 return result;
6037 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6038 vld1_dup_f32 (const float32_t * a)
6040 float32x2_t result;
6041 __asm__ ("ld1r {%0.2s}, %1"
6042 : "=w"(result)
6043 : "Utv"(*a)
6044 : /* No clobbers */);
6045 return result;
6048 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6049 vld1_dup_f64 (const float64_t * a)
6051 float64x1_t result;
6052 __asm__ ("ld1r {%0.1d}, %1"
6053 : "=w"(result)
6054 : "Utv"(*a)
6055 : /* No clobbers */);
6056 return result;
6059 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6060 vld1_dup_p8 (const poly8_t * a)
6062 poly8x8_t result;
6063 __asm__ ("ld1r {%0.8b}, %1"
6064 : "=w"(result)
6065 : "Utv"(*a)
6066 : /* No clobbers */);
6067 return result;
6070 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6071 vld1_dup_p16 (const poly16_t * a)
6073 poly16x4_t result;
6074 __asm__ ("ld1r {%0.4h}, %1"
6075 : "=w"(result)
6076 : "Utv"(*a)
6077 : /* No clobbers */);
6078 return result;
6081 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6082 vld1_dup_s8 (const int8_t * a)
6084 int8x8_t result;
6085 __asm__ ("ld1r {%0.8b}, %1"
6086 : "=w"(result)
6087 : "Utv"(*a)
6088 : /* No clobbers */);
6089 return result;
6092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6093 vld1_dup_s16 (const int16_t * a)
6095 int16x4_t result;
6096 __asm__ ("ld1r {%0.4h}, %1"
6097 : "=w"(result)
6098 : "Utv"(*a)
6099 : /* No clobbers */);
6100 return result;
6103 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6104 vld1_dup_s32 (const int32_t * a)
6106 int32x2_t result;
6107 __asm__ ("ld1r {%0.2s}, %1"
6108 : "=w"(result)
6109 : "Utv"(*a)
6110 : /* No clobbers */);
6111 return result;
6114 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6115 vld1_dup_s64 (const int64_t * a)
6117 int64x1_t result;
6118 __asm__ ("ld1r {%0.1d}, %1"
6119 : "=w"(result)
6120 : "Utv"(*a)
6121 : /* No clobbers */);
6122 return result;
6125 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6126 vld1_dup_u8 (const uint8_t * a)
6128 uint8x8_t result;
6129 __asm__ ("ld1r {%0.8b}, %1"
6130 : "=w"(result)
6131 : "Utv"(*a)
6132 : /* No clobbers */);
6133 return result;
6136 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6137 vld1_dup_u16 (const uint16_t * a)
6139 uint16x4_t result;
6140 __asm__ ("ld1r {%0.4h}, %1"
6141 : "=w"(result)
6142 : "Utv"(*a)
6143 : /* No clobbers */);
6144 return result;
6147 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6148 vld1_dup_u32 (const uint32_t * a)
6150 uint32x2_t result;
6151 __asm__ ("ld1r {%0.2s}, %1"
6152 : "=w"(result)
6153 : "Utv"(*a)
6154 : /* No clobbers */);
6155 return result;
6158 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6159 vld1_dup_u64 (const uint64_t * a)
6161 uint64x1_t result;
6162 __asm__ ("ld1r {%0.1d}, %1"
6163 : "=w"(result)
6164 : "Utv"(*a)
6165 : /* No clobbers */);
6166 return result;
6169 #define vld1_lane_f32(a, b, c) \
6170 __extension__ \
6171 ({ \
6172 float32x2_t b_ = (b); \
6173 const float32_t * a_ = (a); \
6174 float32x2_t result; \
6175 __asm__ ("ld1 {%0.s}[%1], %2" \
6176 : "=w"(result) \
6177 : "i" (c), "Utv"(*a_), "0"(b_) \
6178 : /* No clobbers */); \
6179 result; \
6182 #define vld1_lane_f64(a, b, c) \
6183 __extension__ \
6184 ({ \
6185 float64x1_t b_ = (b); \
6186 const float64_t * a_ = (a); \
6187 float64x1_t result; \
6188 __asm__ ("ld1 {%0.d}[%1], %2" \
6189 : "=w"(result) \
6190 : "i" (c), "Utv"(*a_), "0"(b_) \
6191 : /* No clobbers */); \
6192 result; \
6195 #define vld1_lane_p8(a, b, c) \
6196 __extension__ \
6197 ({ \
6198 poly8x8_t b_ = (b); \
6199 const poly8_t * a_ = (a); \
6200 poly8x8_t result; \
6201 __asm__ ("ld1 {%0.b}[%1], %2" \
6202 : "=w"(result) \
6203 : "i" (c), "Utv"(*a_), "0"(b_) \
6204 : /* No clobbers */); \
6205 result; \
6208 #define vld1_lane_p16(a, b, c) \
6209 __extension__ \
6210 ({ \
6211 poly16x4_t b_ = (b); \
6212 const poly16_t * a_ = (a); \
6213 poly16x4_t result; \
6214 __asm__ ("ld1 {%0.h}[%1], %2" \
6215 : "=w"(result) \
6216 : "i" (c), "Utv"(*a_), "0"(b_) \
6217 : /* No clobbers */); \
6218 result; \
6221 #define vld1_lane_s8(a, b, c) \
6222 __extension__ \
6223 ({ \
6224 int8x8_t b_ = (b); \
6225 const int8_t * a_ = (a); \
6226 int8x8_t result; \
6227 __asm__ ("ld1 {%0.b}[%1], %2" \
6228 : "=w"(result) \
6229 : "i" (c), "Utv"(*a_), "0"(b_) \
6230 : /* No clobbers */); \
6231 result; \
6234 #define vld1_lane_s16(a, b, c) \
6235 __extension__ \
6236 ({ \
6237 int16x4_t b_ = (b); \
6238 const int16_t * a_ = (a); \
6239 int16x4_t result; \
6240 __asm__ ("ld1 {%0.h}[%1], %2" \
6241 : "=w"(result) \
6242 : "i" (c), "Utv"(*a_), "0"(b_) \
6243 : /* No clobbers */); \
6244 result; \
6247 #define vld1_lane_s32(a, b, c) \
6248 __extension__ \
6249 ({ \
6250 int32x2_t b_ = (b); \
6251 const int32_t * a_ = (a); \
6252 int32x2_t result; \
6253 __asm__ ("ld1 {%0.s}[%1], %2" \
6254 : "=w"(result) \
6255 : "i" (c), "Utv"(*a_), "0"(b_) \
6256 : /* No clobbers */); \
6257 result; \
6260 #define vld1_lane_s64(a, b, c) \
6261 __extension__ \
6262 ({ \
6263 int64x1_t b_ = (b); \
6264 const int64_t * a_ = (a); \
6265 int64x1_t result; \
6266 __asm__ ("ld1 {%0.d}[%1], %2" \
6267 : "=w"(result) \
6268 : "i" (c), "Utv"(*a_), "0"(b_) \
6269 : /* No clobbers */); \
6270 result; \
6273 #define vld1_lane_u8(a, b, c) \
6274 __extension__ \
6275 ({ \
6276 uint8x8_t b_ = (b); \
6277 const uint8_t * a_ = (a); \
6278 uint8x8_t result; \
6279 __asm__ ("ld1 {%0.b}[%1], %2" \
6280 : "=w"(result) \
6281 : "i" (c), "Utv"(*a_), "0"(b_) \
6282 : /* No clobbers */); \
6283 result; \
6286 #define vld1_lane_u16(a, b, c) \
6287 __extension__ \
6288 ({ \
6289 uint16x4_t b_ = (b); \
6290 const uint16_t * a_ = (a); \
6291 uint16x4_t result; \
6292 __asm__ ("ld1 {%0.h}[%1], %2" \
6293 : "=w"(result) \
6294 : "i" (c), "Utv"(*a_), "0"(b_) \
6295 : /* No clobbers */); \
6296 result; \
6299 #define vld1_lane_u32(a, b, c) \
6300 __extension__ \
6301 ({ \
6302 uint32x2_t b_ = (b); \
6303 const uint32_t * a_ = (a); \
6304 uint32x2_t result; \
6305 __asm__ ("ld1 {%0.s}[%1], %2" \
6306 : "=w"(result) \
6307 : "i" (c), "Utv"(*a_), "0"(b_) \
6308 : /* No clobbers */); \
6309 result; \
6312 #define vld1_lane_u64(a, b, c) \
6313 __extension__ \
6314 ({ \
6315 uint64x1_t b_ = (b); \
6316 const uint64_t * a_ = (a); \
6317 uint64x1_t result; \
6318 __asm__ ("ld1 {%0.d}[%1], %2" \
6319 : "=w"(result) \
6320 : "i" (c), "Utv"(*a_), "0"(b_) \
6321 : /* No clobbers */); \
6322 result; \
6325 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6326 vld1q_dup_f32 (const float32_t * a)
6328 float32x4_t result;
6329 __asm__ ("ld1r {%0.4s}, %1"
6330 : "=w"(result)
6331 : "Utv"(*a)
6332 : /* No clobbers */);
6333 return result;
6336 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6337 vld1q_dup_f64 (const float64_t * a)
6339 float64x2_t result;
6340 __asm__ ("ld1r {%0.2d}, %1"
6341 : "=w"(result)
6342 : "Utv"(*a)
6343 : /* No clobbers */);
6344 return result;
6347 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6348 vld1q_dup_p8 (const poly8_t * a)
6350 poly8x16_t result;
6351 __asm__ ("ld1r {%0.16b}, %1"
6352 : "=w"(result)
6353 : "Utv"(*a)
6354 : /* No clobbers */);
6355 return result;
6358 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6359 vld1q_dup_p16 (const poly16_t * a)
6361 poly16x8_t result;
6362 __asm__ ("ld1r {%0.8h}, %1"
6363 : "=w"(result)
6364 : "Utv"(*a)
6365 : /* No clobbers */);
6366 return result;
6369 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6370 vld1q_dup_s8 (const int8_t * a)
6372 int8x16_t result;
6373 __asm__ ("ld1r {%0.16b}, %1"
6374 : "=w"(result)
6375 : "Utv"(*a)
6376 : /* No clobbers */);
6377 return result;
6380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6381 vld1q_dup_s16 (const int16_t * a)
6383 int16x8_t result;
6384 __asm__ ("ld1r {%0.8h}, %1"
6385 : "=w"(result)
6386 : "Utv"(*a)
6387 : /* No clobbers */);
6388 return result;
6391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6392 vld1q_dup_s32 (const int32_t * a)
6394 int32x4_t result;
6395 __asm__ ("ld1r {%0.4s}, %1"
6396 : "=w"(result)
6397 : "Utv"(*a)
6398 : /* No clobbers */);
6399 return result;
6402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6403 vld1q_dup_s64 (const int64_t * a)
6405 int64x2_t result;
6406 __asm__ ("ld1r {%0.2d}, %1"
6407 : "=w"(result)
6408 : "Utv"(*a)
6409 : /* No clobbers */);
6410 return result;
6413 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6414 vld1q_dup_u8 (const uint8_t * a)
6416 uint8x16_t result;
6417 __asm__ ("ld1r {%0.16b}, %1"
6418 : "=w"(result)
6419 : "Utv"(*a)
6420 : /* No clobbers */);
6421 return result;
6424 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6425 vld1q_dup_u16 (const uint16_t * a)
6427 uint16x8_t result;
6428 __asm__ ("ld1r {%0.8h}, %1"
6429 : "=w"(result)
6430 : "Utv"(*a)
6431 : /* No clobbers */);
6432 return result;
6435 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6436 vld1q_dup_u32 (const uint32_t * a)
6438 uint32x4_t result;
6439 __asm__ ("ld1r {%0.4s}, %1"
6440 : "=w"(result)
6441 : "Utv"(*a)
6442 : /* No clobbers */);
6443 return result;
6446 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6447 vld1q_dup_u64 (const uint64_t * a)
6449 uint64x2_t result;
6450 __asm__ ("ld1r {%0.2d}, %1"
6451 : "=w"(result)
6452 : "Utv"(*a)
6453 : /* No clobbers */);
6454 return result;
6457 #define vld1q_lane_f32(a, b, c) \
6458 __extension__ \
6459 ({ \
6460 float32x4_t b_ = (b); \
6461 const float32_t * a_ = (a); \
6462 float32x4_t result; \
6463 __asm__ ("ld1 {%0.s}[%1], %2" \
6464 : "=w"(result) \
6465 : "i"(c), "Utv"(*a_), "0"(b_) \
6466 : /* No clobbers */); \
6467 result; \
6470 #define vld1q_lane_f64(a, b, c) \
6471 __extension__ \
6472 ({ \
6473 float64x2_t b_ = (b); \
6474 const float64_t * a_ = (a); \
6475 float64x2_t result; \
6476 __asm__ ("ld1 {%0.d}[%1], %2" \
6477 : "=w"(result) \
6478 : "i"(c), "Utv"(*a_), "0"(b_) \
6479 : /* No clobbers */); \
6480 result; \
6483 #define vld1q_lane_p8(a, b, c) \
6484 __extension__ \
6485 ({ \
6486 poly8x16_t b_ = (b); \
6487 const poly8_t * a_ = (a); \
6488 poly8x16_t result; \
6489 __asm__ ("ld1 {%0.b}[%1], %2" \
6490 : "=w"(result) \
6491 : "i"(c), "Utv"(*a_), "0"(b_) \
6492 : /* No clobbers */); \
6493 result; \
6496 #define vld1q_lane_p16(a, b, c) \
6497 __extension__ \
6498 ({ \
6499 poly16x8_t b_ = (b); \
6500 const poly16_t * a_ = (a); \
6501 poly16x8_t result; \
6502 __asm__ ("ld1 {%0.h}[%1], %2" \
6503 : "=w"(result) \
6504 : "i"(c), "Utv"(*a_), "0"(b_) \
6505 : /* No clobbers */); \
6506 result; \
6509 #define vld1q_lane_s8(a, b, c) \
6510 __extension__ \
6511 ({ \
6512 int8x16_t b_ = (b); \
6513 const int8_t * a_ = (a); \
6514 int8x16_t result; \
6515 __asm__ ("ld1 {%0.b}[%1], %2" \
6516 : "=w"(result) \
6517 : "i"(c), "Utv"(*a_), "0"(b_) \
6518 : /* No clobbers */); \
6519 result; \
6522 #define vld1q_lane_s16(a, b, c) \
6523 __extension__ \
6524 ({ \
6525 int16x8_t b_ = (b); \
6526 const int16_t * a_ = (a); \
6527 int16x8_t result; \
6528 __asm__ ("ld1 {%0.h}[%1], %2" \
6529 : "=w"(result) \
6530 : "i"(c), "Utv"(*a_), "0"(b_) \
6531 : /* No clobbers */); \
6532 result; \
6535 #define vld1q_lane_s32(a, b, c) \
6536 __extension__ \
6537 ({ \
6538 int32x4_t b_ = (b); \
6539 const int32_t * a_ = (a); \
6540 int32x4_t result; \
6541 __asm__ ("ld1 {%0.s}[%1], %2" \
6542 : "=w"(result) \
6543 : "i"(c), "Utv"(*a_), "0"(b_) \
6544 : /* No clobbers */); \
6545 result; \
6548 #define vld1q_lane_s64(a, b, c) \
6549 __extension__ \
6550 ({ \
6551 int64x2_t b_ = (b); \
6552 const int64_t * a_ = (a); \
6553 int64x2_t result; \
6554 __asm__ ("ld1 {%0.d}[%1], %2" \
6555 : "=w"(result) \
6556 : "i"(c), "Utv"(*a_), "0"(b_) \
6557 : /* No clobbers */); \
6558 result; \
6561 #define vld1q_lane_u8(a, b, c) \
6562 __extension__ \
6563 ({ \
6564 uint8x16_t b_ = (b); \
6565 const uint8_t * a_ = (a); \
6566 uint8x16_t result; \
6567 __asm__ ("ld1 {%0.b}[%1], %2" \
6568 : "=w"(result) \
6569 : "i"(c), "Utv"(*a_), "0"(b_) \
6570 : /* No clobbers */); \
6571 result; \
6574 #define vld1q_lane_u16(a, b, c) \
6575 __extension__ \
6576 ({ \
6577 uint16x8_t b_ = (b); \
6578 const uint16_t * a_ = (a); \
6579 uint16x8_t result; \
6580 __asm__ ("ld1 {%0.h}[%1], %2" \
6581 : "=w"(result) \
6582 : "i"(c), "Utv"(*a_), "0"(b_) \
6583 : /* No clobbers */); \
6584 result; \
6587 #define vld1q_lane_u32(a, b, c) \
6588 __extension__ \
6589 ({ \
6590 uint32x4_t b_ = (b); \
6591 const uint32_t * a_ = (a); \
6592 uint32x4_t result; \
6593 __asm__ ("ld1 {%0.s}[%1], %2" \
6594 : "=w"(result) \
6595 : "i"(c), "Utv"(*a_), "0"(b_) \
6596 : /* No clobbers */); \
6597 result; \
6600 #define vld1q_lane_u64(a, b, c) \
6601 __extension__ \
6602 ({ \
6603 uint64x2_t b_ = (b); \
6604 const uint64_t * a_ = (a); \
6605 uint64x2_t result; \
6606 __asm__ ("ld1 {%0.d}[%1], %2" \
6607 : "=w"(result) \
6608 : "i"(c), "Utv"(*a_), "0"(b_) \
6609 : /* No clobbers */); \
6610 result; \
6613 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6614 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6616 float32x2_t result;
6617 float32x2_t t1;
6618 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6619 : "=w"(result), "=w"(t1)
6620 : "0"(a), "w"(b), "w"(c)
6621 : /* No clobbers */);
6622 return result;
6625 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6626 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6628 int16x4_t result;
6629 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6630 : "=w"(result)
6631 : "0"(a), "w"(b), "x"(c)
6632 : /* No clobbers */);
6633 return result;
6636 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6637 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6639 int32x2_t result;
6640 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6641 : "=w"(result)
6642 : "0"(a), "w"(b), "w"(c)
6643 : /* No clobbers */);
6644 return result;
6647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6648 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6650 uint16x4_t result;
6651 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6652 : "=w"(result)
6653 : "0"(a), "w"(b), "x"(c)
6654 : /* No clobbers */);
6655 return result;
6658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6659 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6661 uint32x2_t result;
6662 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6663 : "=w"(result)
6664 : "0"(a), "w"(b), "w"(c)
6665 : /* No clobbers */);
6666 return result;
6669 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6670 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6672 int8x8_t result;
6673 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6674 : "=w"(result)
6675 : "0"(a), "w"(b), "w"(c)
6676 : /* No clobbers */);
6677 return result;
6680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6681 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6683 int16x4_t result;
6684 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6685 : "=w"(result)
6686 : "0"(a), "w"(b), "w"(c)
6687 : /* No clobbers */);
6688 return result;
6691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6692 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6694 int32x2_t result;
6695 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6696 : "=w"(result)
6697 : "0"(a), "w"(b), "w"(c)
6698 : /* No clobbers */);
6699 return result;
6702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6703 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6705 uint8x8_t result;
6706 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6707 : "=w"(result)
6708 : "0"(a), "w"(b), "w"(c)
6709 : /* No clobbers */);
6710 return result;
6713 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6714 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6716 uint16x4_t result;
6717 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6718 : "=w"(result)
6719 : "0"(a), "w"(b), "w"(c)
6720 : /* No clobbers */);
6721 return result;
6724 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6725 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6727 uint32x2_t result;
6728 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6729 : "=w"(result)
6730 : "0"(a), "w"(b), "w"(c)
6731 : /* No clobbers */);
6732 return result;
6735 #define vmlal_high_lane_s16(a, b, c, d) \
6736 __extension__ \
6737 ({ \
6738 int16x4_t c_ = (c); \
6739 int16x8_t b_ = (b); \
6740 int32x4_t a_ = (a); \
6741 int32x4_t result; \
6742 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6743 : "=w"(result) \
6744 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6745 : /* No clobbers */); \
6746 result; \
6749 #define vmlal_high_lane_s32(a, b, c, d) \
6750 __extension__ \
6751 ({ \
6752 int32x2_t c_ = (c); \
6753 int32x4_t b_ = (b); \
6754 int64x2_t a_ = (a); \
6755 int64x2_t result; \
6756 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6757 : "=w"(result) \
6758 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6759 : /* No clobbers */); \
6760 result; \
6763 #define vmlal_high_lane_u16(a, b, c, d) \
6764 __extension__ \
6765 ({ \
6766 uint16x4_t c_ = (c); \
6767 uint16x8_t b_ = (b); \
6768 uint32x4_t a_ = (a); \
6769 uint32x4_t result; \
6770 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6771 : "=w"(result) \
6772 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6773 : /* No clobbers */); \
6774 result; \
6777 #define vmlal_high_lane_u32(a, b, c, d) \
6778 __extension__ \
6779 ({ \
6780 uint32x2_t c_ = (c); \
6781 uint32x4_t b_ = (b); \
6782 uint64x2_t a_ = (a); \
6783 uint64x2_t result; \
6784 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6785 : "=w"(result) \
6786 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6787 : /* No clobbers */); \
6788 result; \
6791 #define vmlal_high_laneq_s16(a, b, c, d) \
6792 __extension__ \
6793 ({ \
6794 int16x8_t c_ = (c); \
6795 int16x8_t b_ = (b); \
6796 int32x4_t a_ = (a); \
6797 int32x4_t result; \
6798 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6799 : "=w"(result) \
6800 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6801 : /* No clobbers */); \
6802 result; \
6805 #define vmlal_high_laneq_s32(a, b, c, d) \
6806 __extension__ \
6807 ({ \
6808 int32x4_t c_ = (c); \
6809 int32x4_t b_ = (b); \
6810 int64x2_t a_ = (a); \
6811 int64x2_t result; \
6812 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6813 : "=w"(result) \
6814 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6815 : /* No clobbers */); \
6816 result; \
6819 #define vmlal_high_laneq_u16(a, b, c, d) \
6820 __extension__ \
6821 ({ \
6822 uint16x8_t c_ = (c); \
6823 uint16x8_t b_ = (b); \
6824 uint32x4_t a_ = (a); \
6825 uint32x4_t result; \
6826 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6827 : "=w"(result) \
6828 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6829 : /* No clobbers */); \
6830 result; \
6833 #define vmlal_high_laneq_u32(a, b, c, d) \
6834 __extension__ \
6835 ({ \
6836 uint32x4_t c_ = (c); \
6837 uint32x4_t b_ = (b); \
6838 uint64x2_t a_ = (a); \
6839 uint64x2_t result; \
6840 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6841 : "=w"(result) \
6842 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6843 : /* No clobbers */); \
6844 result; \
6847 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6848 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6850 int32x4_t result;
6851 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6852 : "=w"(result)
6853 : "0"(a), "w"(b), "x"(c)
6854 : /* No clobbers */);
6855 return result;
6858 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6859 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6861 int64x2_t result;
6862 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6863 : "=w"(result)
6864 : "0"(a), "w"(b), "w"(c)
6865 : /* No clobbers */);
6866 return result;
6869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6870 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6872 uint32x4_t result;
6873 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6874 : "=w"(result)
6875 : "0"(a), "w"(b), "x"(c)
6876 : /* No clobbers */);
6877 return result;
6880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6881 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6883 uint64x2_t result;
6884 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6885 : "=w"(result)
6886 : "0"(a), "w"(b), "w"(c)
6887 : /* No clobbers */);
6888 return result;
6891 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6892 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6894 int16x8_t result;
6895 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6896 : "=w"(result)
6897 : "0"(a), "w"(b), "w"(c)
6898 : /* No clobbers */);
6899 return result;
6902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6903 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6905 int32x4_t result;
6906 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6907 : "=w"(result)
6908 : "0"(a), "w"(b), "w"(c)
6909 : /* No clobbers */);
6910 return result;
6913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6914 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6916 int64x2_t result;
6917 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6918 : "=w"(result)
6919 : "0"(a), "w"(b), "w"(c)
6920 : /* No clobbers */);
6921 return result;
6924 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6925 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6927 uint16x8_t result;
6928 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6929 : "=w"(result)
6930 : "0"(a), "w"(b), "w"(c)
6931 : /* No clobbers */);
6932 return result;
6935 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6936 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6938 uint32x4_t result;
6939 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6940 : "=w"(result)
6941 : "0"(a), "w"(b), "w"(c)
6942 : /* No clobbers */);
6943 return result;
6946 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6947 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6949 uint64x2_t result;
6950 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6951 : "=w"(result)
6952 : "0"(a), "w"(b), "w"(c)
6953 : /* No clobbers */);
6954 return result;
6957 #define vmlal_lane_s16(a, b, c, d) \
6958 __extension__ \
6959 ({ \
6960 int16x4_t c_ = (c); \
6961 int16x4_t b_ = (b); \
6962 int32x4_t a_ = (a); \
6963 int32x4_t result; \
6964 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6965 : "=w"(result) \
6966 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6967 : /* No clobbers */); \
6968 result; \
6971 #define vmlal_lane_s32(a, b, c, d) \
6972 __extension__ \
6973 ({ \
6974 int32x2_t c_ = (c); \
6975 int32x2_t b_ = (b); \
6976 int64x2_t a_ = (a); \
6977 int64x2_t result; \
6978 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6979 : "=w"(result) \
6980 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6981 : /* No clobbers */); \
6982 result; \
6985 #define vmlal_lane_u16(a, b, c, d) \
6986 __extension__ \
6987 ({ \
6988 uint16x4_t c_ = (c); \
6989 uint16x4_t b_ = (b); \
6990 uint32x4_t a_ = (a); \
6991 uint32x4_t result; \
6992 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6993 : "=w"(result) \
6994 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6995 : /* No clobbers */); \
6996 result; \
6999 #define vmlal_lane_u32(a, b, c, d) \
7000 __extension__ \
7001 ({ \
7002 uint32x2_t c_ = (c); \
7003 uint32x2_t b_ = (b); \
7004 uint64x2_t a_ = (a); \
7005 uint64x2_t result; \
7006 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7007 : "=w"(result) \
7008 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7009 : /* No clobbers */); \
7010 result; \
7013 #define vmlal_laneq_s16(a, b, c, d) \
7014 __extension__ \
7015 ({ \
7016 int16x8_t c_ = (c); \
7017 int16x4_t b_ = (b); \
7018 int32x4_t a_ = (a); \
7019 int32x4_t result; \
7020 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7021 : "=w"(result) \
7022 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7023 : /* No clobbers */); \
7024 result; \
7027 #define vmlal_laneq_s32(a, b, c, d) \
7028 __extension__ \
7029 ({ \
7030 int32x4_t c_ = (c); \
7031 int32x2_t b_ = (b); \
7032 int64x2_t a_ = (a); \
7033 int64x2_t result; \
7034 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7035 : "=w"(result) \
7036 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7037 : /* No clobbers */); \
7038 result; \
7041 #define vmlal_laneq_u16(a, b, c, d) \
7042 __extension__ \
7043 ({ \
7044 uint16x8_t c_ = (c); \
7045 uint16x4_t b_ = (b); \
7046 uint32x4_t a_ = (a); \
7047 uint32x4_t result; \
7048 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7049 : "=w"(result) \
7050 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7051 : /* No clobbers */); \
7052 result; \
7055 #define vmlal_laneq_u32(a, b, c, d) \
7056 __extension__ \
7057 ({ \
7058 uint32x4_t c_ = (c); \
7059 uint32x2_t b_ = (b); \
7060 uint64x2_t a_ = (a); \
7061 uint64x2_t result; \
7062 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7063 : "=w"(result) \
7064 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7065 : /* No clobbers */); \
7066 result; \
7069 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7070 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7072 int32x4_t result;
7073 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7074 : "=w"(result)
7075 : "0"(a), "w"(b), "x"(c)
7076 : /* No clobbers */);
7077 return result;
7080 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7081 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7083 int64x2_t result;
7084 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7085 : "=w"(result)
7086 : "0"(a), "w"(b), "w"(c)
7087 : /* No clobbers */);
7088 return result;
7091 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7092 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7094 uint32x4_t result;
7095 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7096 : "=w"(result)
7097 : "0"(a), "w"(b), "x"(c)
7098 : /* No clobbers */);
7099 return result;
7102 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7103 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7105 uint64x2_t result;
7106 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7107 : "=w"(result)
7108 : "0"(a), "w"(b), "w"(c)
7109 : /* No clobbers */);
7110 return result;
7113 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7114 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7116 int16x8_t result;
7117 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7118 : "=w"(result)
7119 : "0"(a), "w"(b), "w"(c)
7120 : /* No clobbers */);
7121 return result;
7124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7125 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7127 int32x4_t result;
7128 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "w"(c)
7131 : /* No clobbers */);
7132 return result;
7135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7136 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7138 int64x2_t result;
7139 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7147 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7149 uint16x8_t result;
7150 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "w"(c)
7153 : /* No clobbers */);
7154 return result;
7157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7158 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7160 uint32x4_t result;
7161 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7168 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7169 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7171 uint64x2_t result;
7172 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7179 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7180 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7182 float32x4_t result;
7183 float32x4_t t1;
7184 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7185 : "=w"(result), "=w"(t1)
7186 : "0"(a), "w"(b), "w"(c)
7187 : /* No clobbers */);
7188 return result;
7191 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7192 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7194 int16x8_t result;
7195 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7196 : "=w"(result)
7197 : "0"(a), "w"(b), "x"(c)
7198 : /* No clobbers */);
7199 return result;
7202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7203 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7205 int32x4_t result;
7206 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7207 : "=w"(result)
7208 : "0"(a), "w"(b), "w"(c)
7209 : /* No clobbers */);
7210 return result;
7213 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7214 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7216 uint16x8_t result;
7217 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7218 : "=w"(result)
7219 : "0"(a), "w"(b), "x"(c)
7220 : /* No clobbers */);
7221 return result;
7224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7225 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7227 uint32x4_t result;
7228 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7229 : "=w"(result)
7230 : "0"(a), "w"(b), "w"(c)
7231 : /* No clobbers */);
7232 return result;
7235 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7236 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7238 int8x16_t result;
7239 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7240 : "=w"(result)
7241 : "0"(a), "w"(b), "w"(c)
7242 : /* No clobbers */);
7243 return result;
7246 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7247 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7249 int16x8_t result;
7250 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7251 : "=w"(result)
7252 : "0"(a), "w"(b), "w"(c)
7253 : /* No clobbers */);
7254 return result;
7257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7258 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7260 int32x4_t result;
7261 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7262 : "=w"(result)
7263 : "0"(a), "w"(b), "w"(c)
7264 : /* No clobbers */);
7265 return result;
7268 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7269 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7271 uint8x16_t result;
7272 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7273 : "=w"(result)
7274 : "0"(a), "w"(b), "w"(c)
7275 : /* No clobbers */);
7276 return result;
7279 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7280 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7282 uint16x8_t result;
7283 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7284 : "=w"(result)
7285 : "0"(a), "w"(b), "w"(c)
7286 : /* No clobbers */);
7287 return result;
7290 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7291 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7293 uint32x4_t result;
7294 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7295 : "=w"(result)
7296 : "0"(a), "w"(b), "w"(c)
7297 : /* No clobbers */);
7298 return result;
7301 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7302 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7304 float32x2_t result;
7305 float32x2_t t1;
7306 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7307 : "=w"(result), "=w"(t1)
7308 : "0"(a), "w"(b), "w"(c)
7309 : /* No clobbers */);
7310 return result;
7313 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7314 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7316 int16x4_t result;
7317 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7318 : "=w"(result)
7319 : "0"(a), "w"(b), "x"(c)
7320 : /* No clobbers */);
7321 return result;
7324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7325 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7327 int32x2_t result;
7328 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7329 : "=w"(result)
7330 : "0"(a), "w"(b), "w"(c)
7331 : /* No clobbers */);
7332 return result;
7335 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7336 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7338 uint16x4_t result;
7339 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7340 : "=w"(result)
7341 : "0"(a), "w"(b), "x"(c)
7342 : /* No clobbers */);
7343 return result;
7346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7347 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7349 uint32x2_t result;
7350 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7351 : "=w"(result)
7352 : "0"(a), "w"(b), "w"(c)
7353 : /* No clobbers */);
7354 return result;
7357 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7358 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7360 int8x8_t result;
7361 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7362 : "=w"(result)
7363 : "0"(a), "w"(b), "w"(c)
7364 : /* No clobbers */);
7365 return result;
7368 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7369 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7371 int16x4_t result;
7372 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7373 : "=w"(result)
7374 : "0"(a), "w"(b), "w"(c)
7375 : /* No clobbers */);
7376 return result;
7379 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7380 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7382 int32x2_t result;
7383 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7384 : "=w"(result)
7385 : "0"(a), "w"(b), "w"(c)
7386 : /* No clobbers */);
7387 return result;
7390 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7391 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7393 uint8x8_t result;
7394 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7395 : "=w"(result)
7396 : "0"(a), "w"(b), "w"(c)
7397 : /* No clobbers */);
7398 return result;
7401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7402 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7404 uint16x4_t result;
7405 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7406 : "=w"(result)
7407 : "0"(a), "w"(b), "w"(c)
7408 : /* No clobbers */);
7409 return result;
7412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7413 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7415 uint32x2_t result;
7416 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7417 : "=w"(result)
7418 : "0"(a), "w"(b), "w"(c)
7419 : /* No clobbers */);
7420 return result;
7423 #define vmlsl_high_lane_s16(a, b, c, d) \
7424 __extension__ \
7425 ({ \
7426 int16x4_t c_ = (c); \
7427 int16x8_t b_ = (b); \
7428 int32x4_t a_ = (a); \
7429 int32x4_t result; \
7430 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7431 : "=w"(result) \
7432 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7433 : /* No clobbers */); \
7434 result; \
7437 #define vmlsl_high_lane_s32(a, b, c, d) \
7438 __extension__ \
7439 ({ \
7440 int32x2_t c_ = (c); \
7441 int32x4_t b_ = (b); \
7442 int64x2_t a_ = (a); \
7443 int64x2_t result; \
7444 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7445 : "=w"(result) \
7446 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7447 : /* No clobbers */); \
7448 result; \
7451 #define vmlsl_high_lane_u16(a, b, c, d) \
7452 __extension__ \
7453 ({ \
7454 uint16x4_t c_ = (c); \
7455 uint16x8_t b_ = (b); \
7456 uint32x4_t a_ = (a); \
7457 uint32x4_t result; \
7458 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7459 : "=w"(result) \
7460 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7461 : /* No clobbers */); \
7462 result; \
7465 #define vmlsl_high_lane_u32(a, b, c, d) \
7466 __extension__ \
7467 ({ \
7468 uint32x2_t c_ = (c); \
7469 uint32x4_t b_ = (b); \
7470 uint64x2_t a_ = (a); \
7471 uint64x2_t result; \
7472 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7473 : "=w"(result) \
7474 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7475 : /* No clobbers */); \
7476 result; \
7479 #define vmlsl_high_laneq_s16(a, b, c, d) \
7480 __extension__ \
7481 ({ \
7482 int16x8_t c_ = (c); \
7483 int16x8_t b_ = (b); \
7484 int32x4_t a_ = (a); \
7485 int32x4_t result; \
7486 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7487 : "=w"(result) \
7488 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7489 : /* No clobbers */); \
7490 result; \
7493 #define vmlsl_high_laneq_s32(a, b, c, d) \
7494 __extension__ \
7495 ({ \
7496 int32x4_t c_ = (c); \
7497 int32x4_t b_ = (b); \
7498 int64x2_t a_ = (a); \
7499 int64x2_t result; \
7500 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7501 : "=w"(result) \
7502 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7503 : /* No clobbers */); \
7504 result; \
7507 #define vmlsl_high_laneq_u16(a, b, c, d) \
7508 __extension__ \
7509 ({ \
7510 uint16x8_t c_ = (c); \
7511 uint16x8_t b_ = (b); \
7512 uint32x4_t a_ = (a); \
7513 uint32x4_t result; \
7514 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7515 : "=w"(result) \
7516 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7517 : /* No clobbers */); \
7518 result; \
7521 #define vmlsl_high_laneq_u32(a, b, c, d) \
7522 __extension__ \
7523 ({ \
7524 uint32x4_t c_ = (c); \
7525 uint32x4_t b_ = (b); \
7526 uint64x2_t a_ = (a); \
7527 uint64x2_t result; \
7528 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7529 : "=w"(result) \
7530 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7531 : /* No clobbers */); \
7532 result; \
7535 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7536 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7538 int32x4_t result;
7539 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7540 : "=w"(result)
7541 : "0"(a), "w"(b), "x"(c)
7542 : /* No clobbers */);
7543 return result;
7546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7547 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7549 int64x2_t result;
7550 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7551 : "=w"(result)
7552 : "0"(a), "w"(b), "w"(c)
7553 : /* No clobbers */);
7554 return result;
7557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7558 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7560 uint32x4_t result;
7561 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7562 : "=w"(result)
7563 : "0"(a), "w"(b), "x"(c)
7564 : /* No clobbers */);
7565 return result;
7568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7569 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7571 uint64x2_t result;
7572 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7573 : "=w"(result)
7574 : "0"(a), "w"(b), "w"(c)
7575 : /* No clobbers */);
7576 return result;
7579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7580 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7582 int16x8_t result;
7583 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7584 : "=w"(result)
7585 : "0"(a), "w"(b), "w"(c)
7586 : /* No clobbers */);
7587 return result;
7590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7591 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7593 int32x4_t result;
7594 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7595 : "=w"(result)
7596 : "0"(a), "w"(b), "w"(c)
7597 : /* No clobbers */);
7598 return result;
7601 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7602 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7604 int64x2_t result;
7605 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7606 : "=w"(result)
7607 : "0"(a), "w"(b), "w"(c)
7608 : /* No clobbers */);
7609 return result;
7612 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7613 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7615 uint16x8_t result;
7616 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7617 : "=w"(result)
7618 : "0"(a), "w"(b), "w"(c)
7619 : /* No clobbers */);
7620 return result;
7623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7624 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7626 uint32x4_t result;
7627 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7628 : "=w"(result)
7629 : "0"(a), "w"(b), "w"(c)
7630 : /* No clobbers */);
7631 return result;
7634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7635 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7637 uint64x2_t result;
7638 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7639 : "=w"(result)
7640 : "0"(a), "w"(b), "w"(c)
7641 : /* No clobbers */);
7642 return result;
7645 #define vmlsl_lane_s16(a, b, c, d) \
7646 __extension__ \
7647 ({ \
7648 int16x4_t c_ = (c); \
7649 int16x4_t b_ = (b); \
7650 int32x4_t a_ = (a); \
7651 int32x4_t result; \
7652 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7653 : "=w"(result) \
7654 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7655 : /* No clobbers */); \
7656 result; \
7659 #define vmlsl_lane_s32(a, b, c, d) \
7660 __extension__ \
7661 ({ \
7662 int32x2_t c_ = (c); \
7663 int32x2_t b_ = (b); \
7664 int64x2_t a_ = (a); \
7665 int64x2_t result; \
7666 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7667 : "=w"(result) \
7668 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7669 : /* No clobbers */); \
7670 result; \
7673 #define vmlsl_lane_u16(a, b, c, d) \
7674 __extension__ \
7675 ({ \
7676 uint16x4_t c_ = (c); \
7677 uint16x4_t b_ = (b); \
7678 uint32x4_t a_ = (a); \
7679 uint32x4_t result; \
7680 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7681 : "=w"(result) \
7682 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7683 : /* No clobbers */); \
7684 result; \
7687 #define vmlsl_lane_u32(a, b, c, d) \
7688 __extension__ \
7689 ({ \
7690 uint32x2_t c_ = (c); \
7691 uint32x2_t b_ = (b); \
7692 uint64x2_t a_ = (a); \
7693 uint64x2_t result; \
7694 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7695 : "=w"(result) \
7696 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7697 : /* No clobbers */); \
7698 result; \
7701 #define vmlsl_laneq_s16(a, b, c, d) \
7702 __extension__ \
7703 ({ \
7704 int16x8_t c_ = (c); \
7705 int16x4_t b_ = (b); \
7706 int32x4_t a_ = (a); \
7707 int32x4_t result; \
7708 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7709 : "=w"(result) \
7710 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7711 : /* No clobbers */); \
7712 result; \
7715 #define vmlsl_laneq_s32(a, b, c, d) \
7716 __extension__ \
7717 ({ \
7718 int32x4_t c_ = (c); \
7719 int32x2_t b_ = (b); \
7720 int64x2_t a_ = (a); \
7721 int64x2_t result; \
7722 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7723 : "=w"(result) \
7724 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7725 : /* No clobbers */); \
7726 result; \
7729 #define vmlsl_laneq_u16(a, b, c, d) \
7730 __extension__ \
7731 ({ \
7732 uint16x8_t c_ = (c); \
7733 uint16x4_t b_ = (b); \
7734 uint32x4_t a_ = (a); \
7735 uint32x4_t result; \
7736 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7737 : "=w"(result) \
7738 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7739 : /* No clobbers */); \
7740 result; \
7743 #define vmlsl_laneq_u32(a, b, c, d) \
7744 __extension__ \
7745 ({ \
7746 uint32x4_t c_ = (c); \
7747 uint32x2_t b_ = (b); \
7748 uint64x2_t a_ = (a); \
7749 uint64x2_t result; \
7750 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7751 : "=w"(result) \
7752 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7753 : /* No clobbers */); \
7754 result; \
7757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7758 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7760 int32x4_t result;
7761 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7762 : "=w"(result)
7763 : "0"(a), "w"(b), "x"(c)
7764 : /* No clobbers */);
7765 return result;
7768 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7769 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7771 int64x2_t result;
7772 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7773 : "=w"(result)
7774 : "0"(a), "w"(b), "w"(c)
7775 : /* No clobbers */);
7776 return result;
7779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7780 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7782 uint32x4_t result;
7783 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7784 : "=w"(result)
7785 : "0"(a), "w"(b), "x"(c)
7786 : /* No clobbers */);
7787 return result;
7790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7791 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7793 uint64x2_t result;
7794 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7795 : "=w"(result)
7796 : "0"(a), "w"(b), "w"(c)
7797 : /* No clobbers */);
7798 return result;
7801 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7802 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7804 int16x8_t result;
7805 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7806 : "=w"(result)
7807 : "0"(a), "w"(b), "w"(c)
7808 : /* No clobbers */);
7809 return result;
7812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7813 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7815 int32x4_t result;
7816 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7817 : "=w"(result)
7818 : "0"(a), "w"(b), "w"(c)
7819 : /* No clobbers */);
7820 return result;
7823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7824 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7826 int64x2_t result;
7827 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7828 : "=w"(result)
7829 : "0"(a), "w"(b), "w"(c)
7830 : /* No clobbers */);
7831 return result;
7834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7835 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7837 uint16x8_t result;
7838 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7839 : "=w"(result)
7840 : "0"(a), "w"(b), "w"(c)
7841 : /* No clobbers */);
7842 return result;
7845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7846 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7848 uint32x4_t result;
7849 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7850 : "=w"(result)
7851 : "0"(a), "w"(b), "w"(c)
7852 : /* No clobbers */);
7853 return result;
7856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7857 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7859 uint64x2_t result;
7860 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7861 : "=w"(result)
7862 : "0"(a), "w"(b), "w"(c)
7863 : /* No clobbers */);
7864 return result;
7867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7868 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7870 float32x4_t result;
7871 float32x4_t t1;
7872 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7873 : "=w"(result), "=w"(t1)
7874 : "0"(a), "w"(b), "w"(c)
7875 : /* No clobbers */);
7876 return result;
7879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7880 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7882 int16x8_t result;
7883 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7884 : "=w"(result)
7885 : "0"(a), "w"(b), "x"(c)
7886 : /* No clobbers */);
7887 return result;
7890 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7891 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7893 int32x4_t result;
7894 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7895 : "=w"(result)
7896 : "0"(a), "w"(b), "w"(c)
7897 : /* No clobbers */);
7898 return result;
7901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7902 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7904 uint16x8_t result;
7905 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7906 : "=w"(result)
7907 : "0"(a), "w"(b), "x"(c)
7908 : /* No clobbers */);
7909 return result;
7912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7913 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7915 uint32x4_t result;
7916 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7917 : "=w"(result)
7918 : "0"(a), "w"(b), "w"(c)
7919 : /* No clobbers */);
7920 return result;
7923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7924 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7926 int8x16_t result;
7927 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7928 : "=w"(result)
7929 : "0"(a), "w"(b), "w"(c)
7930 : /* No clobbers */);
7931 return result;
7934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7935 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7937 int16x8_t result;
7938 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7939 : "=w"(result)
7940 : "0"(a), "w"(b), "w"(c)
7941 : /* No clobbers */);
7942 return result;
7945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7946 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7948 int32x4_t result;
7949 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7950 : "=w"(result)
7951 : "0"(a), "w"(b), "w"(c)
7952 : /* No clobbers */);
7953 return result;
7956 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7957 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7959 uint8x16_t result;
7960 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7961 : "=w"(result)
7962 : "0"(a), "w"(b), "w"(c)
7963 : /* No clobbers */);
7964 return result;
7967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7968 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7970 uint16x8_t result;
7971 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7972 : "=w"(result)
7973 : "0"(a), "w"(b), "w"(c)
7974 : /* No clobbers */);
7975 return result;
7978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7979 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7981 uint32x4_t result;
7982 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7983 : "=w"(result)
7984 : "0"(a), "w"(b), "w"(c)
7985 : /* No clobbers */);
7986 return result;
7989 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7990 vmovl_high_s8 (int8x16_t a)
7992 int16x8_t result;
7993 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7994 : "=w"(result)
7995 : "w"(a)
7996 : /* No clobbers */);
7997 return result;
8000 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8001 vmovl_high_s16 (int16x8_t a)
8003 int32x4_t result;
8004 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8005 : "=w"(result)
8006 : "w"(a)
8007 : /* No clobbers */);
8008 return result;
8011 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8012 vmovl_high_s32 (int32x4_t a)
8014 int64x2_t result;
8015 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8016 : "=w"(result)
8017 : "w"(a)
8018 : /* No clobbers */);
8019 return result;
8022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8023 vmovl_high_u8 (uint8x16_t a)
8025 uint16x8_t result;
8026 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8027 : "=w"(result)
8028 : "w"(a)
8029 : /* No clobbers */);
8030 return result;
8033 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8034 vmovl_high_u16 (uint16x8_t a)
8036 uint32x4_t result;
8037 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8038 : "=w"(result)
8039 : "w"(a)
8040 : /* No clobbers */);
8041 return result;
8044 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8045 vmovl_high_u32 (uint32x4_t a)
8047 uint64x2_t result;
8048 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8049 : "=w"(result)
8050 : "w"(a)
8051 : /* No clobbers */);
8052 return result;
8055 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8056 vmovl_s8 (int8x8_t a)
8058 int16x8_t result;
8059 __asm__ ("sshll %0.8h,%1.8b,#0"
8060 : "=w"(result)
8061 : "w"(a)
8062 : /* No clobbers */);
8063 return result;
8066 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8067 vmovl_s16 (int16x4_t a)
8069 int32x4_t result;
8070 __asm__ ("sshll %0.4s,%1.4h,#0"
8071 : "=w"(result)
8072 : "w"(a)
8073 : /* No clobbers */);
8074 return result;
8077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8078 vmovl_s32 (int32x2_t a)
8080 int64x2_t result;
8081 __asm__ ("sshll %0.2d,%1.2s,#0"
8082 : "=w"(result)
8083 : "w"(a)
8084 : /* No clobbers */);
8085 return result;
8088 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8089 vmovl_u8 (uint8x8_t a)
8091 uint16x8_t result;
8092 __asm__ ("ushll %0.8h,%1.8b,#0"
8093 : "=w"(result)
8094 : "w"(a)
8095 : /* No clobbers */);
8096 return result;
8099 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8100 vmovl_u16 (uint16x4_t a)
8102 uint32x4_t result;
8103 __asm__ ("ushll %0.4s,%1.4h,#0"
8104 : "=w"(result)
8105 : "w"(a)
8106 : /* No clobbers */);
8107 return result;
8110 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8111 vmovl_u32 (uint32x2_t a)
8113 uint64x2_t result;
8114 __asm__ ("ushll %0.2d,%1.2s,#0"
8115 : "=w"(result)
8116 : "w"(a)
8117 : /* No clobbers */);
8118 return result;
8121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8122 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8124 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8125 __asm__ ("xtn2 %0.16b,%1.8h"
8126 : "+w"(result)
8127 : "w"(b)
8128 : /* No clobbers */);
8129 return result;
8132 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8133 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8135 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8136 __asm__ ("xtn2 %0.8h,%1.4s"
8137 : "+w"(result)
8138 : "w"(b)
8139 : /* No clobbers */);
8140 return result;
8143 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8144 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8146 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8147 __asm__ ("xtn2 %0.4s,%1.2d"
8148 : "+w"(result)
8149 : "w"(b)
8150 : /* No clobbers */);
8151 return result;
8154 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8155 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8157 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8158 __asm__ ("xtn2 %0.16b,%1.8h"
8159 : "+w"(result)
8160 : "w"(b)
8161 : /* No clobbers */);
8162 return result;
8165 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8166 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8168 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8169 __asm__ ("xtn2 %0.8h,%1.4s"
8170 : "+w"(result)
8171 : "w"(b)
8172 : /* No clobbers */);
8173 return result;
8176 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8177 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8179 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8180 __asm__ ("xtn2 %0.4s,%1.2d"
8181 : "+w"(result)
8182 : "w"(b)
8183 : /* No clobbers */);
8184 return result;
8187 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8188 vmovn_s16 (int16x8_t a)
8190 int8x8_t result;
8191 __asm__ ("xtn %0.8b,%1.8h"
8192 : "=w"(result)
8193 : "w"(a)
8194 : /* No clobbers */);
8195 return result;
8198 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8199 vmovn_s32 (int32x4_t a)
8201 int16x4_t result;
8202 __asm__ ("xtn %0.4h,%1.4s"
8203 : "=w"(result)
8204 : "w"(a)
8205 : /* No clobbers */);
8206 return result;
8209 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8210 vmovn_s64 (int64x2_t a)
8212 int32x2_t result;
8213 __asm__ ("xtn %0.2s,%1.2d"
8214 : "=w"(result)
8215 : "w"(a)
8216 : /* No clobbers */);
8217 return result;
8220 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8221 vmovn_u16 (uint16x8_t a)
8223 uint8x8_t result;
8224 __asm__ ("xtn %0.8b,%1.8h"
8225 : "=w"(result)
8226 : "w"(a)
8227 : /* No clobbers */);
8228 return result;
8231 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8232 vmovn_u32 (uint32x4_t a)
8234 uint16x4_t result;
8235 __asm__ ("xtn %0.4h,%1.4s"
8236 : "=w"(result)
8237 : "w"(a)
8238 : /* No clobbers */);
8239 return result;
8242 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8243 vmovn_u64 (uint64x2_t a)
8245 uint32x2_t result;
8246 __asm__ ("xtn %0.2s,%1.2d"
8247 : "=w"(result)
8248 : "w"(a)
8249 : /* No clobbers */);
8250 return result;
8253 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8254 vmul_n_f32 (float32x2_t a, float32_t b)
8256 float32x2_t result;
8257 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8258 : "=w"(result)
8259 : "w"(a), "w"(b)
8260 : /* No clobbers */);
8261 return result;
8264 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8265 vmul_n_s16 (int16x4_t a, int16_t b)
8267 int16x4_t result;
8268 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8269 : "=w"(result)
8270 : "w"(a), "x"(b)
8271 : /* No clobbers */);
8272 return result;
8275 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8276 vmul_n_s32 (int32x2_t a, int32_t b)
8278 int32x2_t result;
8279 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8280 : "=w"(result)
8281 : "w"(a), "w"(b)
8282 : /* No clobbers */);
8283 return result;
8286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8287 vmul_n_u16 (uint16x4_t a, uint16_t b)
8289 uint16x4_t result;
8290 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8291 : "=w"(result)
8292 : "w"(a), "x"(b)
8293 : /* No clobbers */);
8294 return result;
8297 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8298 vmul_n_u32 (uint32x2_t a, uint32_t b)
8300 uint32x2_t result;
8301 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8302 : "=w"(result)
8303 : "w"(a), "w"(b)
8304 : /* No clobbers */);
8305 return result;
8308 #define vmull_high_lane_s16(a, b, c) \
8309 __extension__ \
8310 ({ \
8311 int16x8_t b_ = (b); \
8312 int16x8_t a_ = (a); \
8313 int32x4_t result; \
8314 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8315 : "=w"(result) \
8316 : "w"(a_), "x"(b_), "i"(c) \
8317 : /* No clobbers */); \
8318 result; \
8321 #define vmull_high_lane_s32(a, b, c) \
8322 __extension__ \
8323 ({ \
8324 int32x4_t b_ = (b); \
8325 int32x4_t a_ = (a); \
8326 int64x2_t result; \
8327 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8328 : "=w"(result) \
8329 : "w"(a_), "w"(b_), "i"(c) \
8330 : /* No clobbers */); \
8331 result; \
8334 #define vmull_high_lane_u16(a, b, c) \
8335 __extension__ \
8336 ({ \
8337 uint16x8_t b_ = (b); \
8338 uint16x8_t a_ = (a); \
8339 uint32x4_t result; \
8340 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8341 : "=w"(result) \
8342 : "w"(a_), "x"(b_), "i"(c) \
8343 : /* No clobbers */); \
8344 result; \
8347 #define vmull_high_lane_u32(a, b, c) \
8348 __extension__ \
8349 ({ \
8350 uint32x4_t b_ = (b); \
8351 uint32x4_t a_ = (a); \
8352 uint64x2_t result; \
8353 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8354 : "=w"(result) \
8355 : "w"(a_), "w"(b_), "i"(c) \
8356 : /* No clobbers */); \
8357 result; \
8360 #define vmull_high_laneq_s16(a, b, c) \
8361 __extension__ \
8362 ({ \
8363 int16x8_t b_ = (b); \
8364 int16x8_t a_ = (a); \
8365 int32x4_t result; \
8366 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8367 : "=w"(result) \
8368 : "w"(a_), "x"(b_), "i"(c) \
8369 : /* No clobbers */); \
8370 result; \
8373 #define vmull_high_laneq_s32(a, b, c) \
8374 __extension__ \
8375 ({ \
8376 int32x4_t b_ = (b); \
8377 int32x4_t a_ = (a); \
8378 int64x2_t result; \
8379 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8380 : "=w"(result) \
8381 : "w"(a_), "w"(b_), "i"(c) \
8382 : /* No clobbers */); \
8383 result; \
8386 #define vmull_high_laneq_u16(a, b, c) \
8387 __extension__ \
8388 ({ \
8389 uint16x8_t b_ = (b); \
8390 uint16x8_t a_ = (a); \
8391 uint32x4_t result; \
8392 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8393 : "=w"(result) \
8394 : "w"(a_), "x"(b_), "i"(c) \
8395 : /* No clobbers */); \
8396 result; \
8399 #define vmull_high_laneq_u32(a, b, c) \
8400 __extension__ \
8401 ({ \
8402 uint32x4_t b_ = (b); \
8403 uint32x4_t a_ = (a); \
8404 uint64x2_t result; \
8405 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8406 : "=w"(result) \
8407 : "w"(a_), "w"(b_), "i"(c) \
8408 : /* No clobbers */); \
8409 result; \
8412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8413 vmull_high_n_s16 (int16x8_t a, int16_t b)
8415 int32x4_t result;
8416 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8417 : "=w"(result)
8418 : "w"(a), "x"(b)
8419 : /* No clobbers */);
8420 return result;
8423 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8424 vmull_high_n_s32 (int32x4_t a, int32_t b)
8426 int64x2_t result;
8427 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8428 : "=w"(result)
8429 : "w"(a), "w"(b)
8430 : /* No clobbers */);
8431 return result;
8434 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8435 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8437 uint32x4_t result;
8438 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8439 : "=w"(result)
8440 : "w"(a), "x"(b)
8441 : /* No clobbers */);
8442 return result;
8445 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8446 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8448 uint64x2_t result;
8449 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8450 : "=w"(result)
8451 : "w"(a), "w"(b)
8452 : /* No clobbers */);
8453 return result;
8456 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8457 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8459 poly16x8_t result;
8460 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8461 : "=w"(result)
8462 : "w"(a), "w"(b)
8463 : /* No clobbers */);
8464 return result;
8467 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8468 vmull_high_s8 (int8x16_t a, int8x16_t b)
8470 int16x8_t result;
8471 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8472 : "=w"(result)
8473 : "w"(a), "w"(b)
8474 : /* No clobbers */);
8475 return result;
8478 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8479 vmull_high_s16 (int16x8_t a, int16x8_t b)
8481 int32x4_t result;
8482 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8483 : "=w"(result)
8484 : "w"(a), "w"(b)
8485 : /* No clobbers */);
8486 return result;
8489 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8490 vmull_high_s32 (int32x4_t a, int32x4_t b)
8492 int64x2_t result;
8493 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8494 : "=w"(result)
8495 : "w"(a), "w"(b)
8496 : /* No clobbers */);
8497 return result;
8500 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8501 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8503 uint16x8_t result;
8504 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8505 : "=w"(result)
8506 : "w"(a), "w"(b)
8507 : /* No clobbers */);
8508 return result;
8511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8512 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8514 uint32x4_t result;
8515 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8516 : "=w"(result)
8517 : "w"(a), "w"(b)
8518 : /* No clobbers */);
8519 return result;
8522 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8523 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8525 uint64x2_t result;
8526 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8527 : "=w"(result)
8528 : "w"(a), "w"(b)
8529 : /* No clobbers */);
8530 return result;
8533 #define vmull_lane_s16(a, b, c) \
8534 __extension__ \
8535 ({ \
8536 int16x4_t b_ = (b); \
8537 int16x4_t a_ = (a); \
8538 int32x4_t result; \
8539 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8540 : "=w"(result) \
8541 : "w"(a_), "x"(b_), "i"(c) \
8542 : /* No clobbers */); \
8543 result; \
8546 #define vmull_lane_s32(a, b, c) \
8547 __extension__ \
8548 ({ \
8549 int32x2_t b_ = (b); \
8550 int32x2_t a_ = (a); \
8551 int64x2_t result; \
8552 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8553 : "=w"(result) \
8554 : "w"(a_), "w"(b_), "i"(c) \
8555 : /* No clobbers */); \
8556 result; \
8559 #define vmull_lane_u16(a, b, c) \
8560 __extension__ \
8561 ({ \
8562 uint16x4_t b_ = (b); \
8563 uint16x4_t a_ = (a); \
8564 uint32x4_t result; \
8565 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8566 : "=w"(result) \
8567 : "w"(a_), "x"(b_), "i"(c) \
8568 : /* No clobbers */); \
8569 result; \
8572 #define vmull_lane_u32(a, b, c) \
8573 __extension__ \
8574 ({ \
8575 uint32x2_t b_ = (b); \
8576 uint32x2_t a_ = (a); \
8577 uint64x2_t result; \
8578 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8579 : "=w"(result) \
8580 : "w"(a_), "w"(b_), "i"(c) \
8581 : /* No clobbers */); \
8582 result; \
8585 #define vmull_laneq_s16(a, b, c) \
8586 __extension__ \
8587 ({ \
8588 int16x8_t b_ = (b); \
8589 int16x4_t a_ = (a); \
8590 int32x4_t result; \
8591 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8592 : "=w"(result) \
8593 : "w"(a_), "x"(b_), "i"(c) \
8594 : /* No clobbers */); \
8595 result; \
8598 #define vmull_laneq_s32(a, b, c) \
8599 __extension__ \
8600 ({ \
8601 int32x4_t b_ = (b); \
8602 int32x2_t a_ = (a); \
8603 int64x2_t result; \
8604 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8605 : "=w"(result) \
8606 : "w"(a_), "w"(b_), "i"(c) \
8607 : /* No clobbers */); \
8608 result; \
8611 #define vmull_laneq_u16(a, b, c) \
8612 __extension__ \
8613 ({ \
8614 uint16x8_t b_ = (b); \
8615 uint16x4_t a_ = (a); \
8616 uint32x4_t result; \
8617 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8618 : "=w"(result) \
8619 : "w"(a_), "x"(b_), "i"(c) \
8620 : /* No clobbers */); \
8621 result; \
8624 #define vmull_laneq_u32(a, b, c) \
8625 __extension__ \
8626 ({ \
8627 uint32x4_t b_ = (b); \
8628 uint32x2_t a_ = (a); \
8629 uint64x2_t result; \
8630 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8631 : "=w"(result) \
8632 : "w"(a_), "w"(b_), "i"(c) \
8633 : /* No clobbers */); \
8634 result; \
8637 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8638 vmull_n_s16 (int16x4_t a, int16_t b)
8640 int32x4_t result;
8641 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8642 : "=w"(result)
8643 : "w"(a), "x"(b)
8644 : /* No clobbers */);
8645 return result;
8648 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8649 vmull_n_s32 (int32x2_t a, int32_t b)
8651 int64x2_t result;
8652 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8653 : "=w"(result)
8654 : "w"(a), "w"(b)
8655 : /* No clobbers */);
8656 return result;
8659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8660 vmull_n_u16 (uint16x4_t a, uint16_t b)
8662 uint32x4_t result;
8663 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8664 : "=w"(result)
8665 : "w"(a), "x"(b)
8666 : /* No clobbers */);
8667 return result;
8670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8671 vmull_n_u32 (uint32x2_t a, uint32_t b)
8673 uint64x2_t result;
8674 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8675 : "=w"(result)
8676 : "w"(a), "w"(b)
8677 : /* No clobbers */);
8678 return result;
8681 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8682 vmull_p8 (poly8x8_t a, poly8x8_t b)
8684 poly16x8_t result;
8685 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8686 : "=w"(result)
8687 : "w"(a), "w"(b)
8688 : /* No clobbers */);
8689 return result;
8692 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8693 vmull_s8 (int8x8_t a, int8x8_t b)
8695 int16x8_t result;
8696 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8697 : "=w"(result)
8698 : "w"(a), "w"(b)
8699 : /* No clobbers */);
8700 return result;
8703 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8704 vmull_s16 (int16x4_t a, int16x4_t b)
8706 int32x4_t result;
8707 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8708 : "=w"(result)
8709 : "w"(a), "w"(b)
8710 : /* No clobbers */);
8711 return result;
8714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8715 vmull_s32 (int32x2_t a, int32x2_t b)
8717 int64x2_t result;
8718 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8719 : "=w"(result)
8720 : "w"(a), "w"(b)
8721 : /* No clobbers */);
8722 return result;
8725 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8726 vmull_u8 (uint8x8_t a, uint8x8_t b)
8728 uint16x8_t result;
8729 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8730 : "=w"(result)
8731 : "w"(a), "w"(b)
8732 : /* No clobbers */);
8733 return result;
8736 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8737 vmull_u16 (uint16x4_t a, uint16x4_t b)
8739 uint32x4_t result;
8740 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8741 : "=w"(result)
8742 : "w"(a), "w"(b)
8743 : /* No clobbers */);
8744 return result;
8747 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8748 vmull_u32 (uint32x2_t a, uint32x2_t b)
8750 uint64x2_t result;
8751 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8752 : "=w"(result)
8753 : "w"(a), "w"(b)
8754 : /* No clobbers */);
8755 return result;
8758 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8759 vmulq_n_f32 (float32x4_t a, float32_t b)
8761 float32x4_t result;
8762 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8763 : "=w"(result)
8764 : "w"(a), "w"(b)
8765 : /* No clobbers */);
8766 return result;
8769 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8770 vmulq_n_f64 (float64x2_t a, float64_t b)
8772 float64x2_t result;
8773 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8774 : "=w"(result)
8775 : "w"(a), "w"(b)
8776 : /* No clobbers */);
8777 return result;
8780 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8781 vmulq_n_s16 (int16x8_t a, int16_t b)
8783 int16x8_t result;
8784 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8785 : "=w"(result)
8786 : "w"(a), "x"(b)
8787 : /* No clobbers */);
8788 return result;
8791 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8792 vmulq_n_s32 (int32x4_t a, int32_t b)
8794 int32x4_t result;
8795 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8796 : "=w"(result)
8797 : "w"(a), "w"(b)
8798 : /* No clobbers */);
8799 return result;
8802 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8803 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8805 uint16x8_t result;
8806 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8807 : "=w"(result)
8808 : "w"(a), "x"(b)
8809 : /* No clobbers */);
8810 return result;
8813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8814 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8816 uint32x4_t result;
8817 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8818 : "=w"(result)
8819 : "w"(a), "w"(b)
8820 : /* No clobbers */);
8821 return result;
8824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8825 vmulx_f32 (float32x2_t a, float32x2_t b)
8827 float32x2_t result;
8828 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8829 : "=w"(result)
8830 : "w"(a), "w"(b)
8831 : /* No clobbers */);
8832 return result;
8835 #define vmulx_lane_f32(a, b, c) \
8836 __extension__ \
8837 ({ \
8838 float32x4_t b_ = (b); \
8839 float32x2_t a_ = (a); \
8840 float32x2_t result; \
8841 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8842 : "=w"(result) \
8843 : "w"(a_), "w"(b_), "i"(c) \
8844 : /* No clobbers */); \
8845 result; \
8848 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8849 vmulxd_f64 (float64_t a, float64_t b)
8851 float64_t result;
8852 __asm__ ("fmulx %d0, %d1, %d2"
8853 : "=w"(result)
8854 : "w"(a), "w"(b)
8855 : /* No clobbers */);
8856 return result;
8859 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8860 vmulxq_f32 (float32x4_t a, float32x4_t b)
8862 float32x4_t result;
8863 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8864 : "=w"(result)
8865 : "w"(a), "w"(b)
8866 : /* No clobbers */);
8867 return result;
8870 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8871 vmulxq_f64 (float64x2_t a, float64x2_t b)
8873 float64x2_t result;
8874 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8875 : "=w"(result)
8876 : "w"(a), "w"(b)
8877 : /* No clobbers */);
8878 return result;
8881 #define vmulxq_lane_f32(a, b, c) \
8882 __extension__ \
8883 ({ \
8884 float32x4_t b_ = (b); \
8885 float32x4_t a_ = (a); \
8886 float32x4_t result; \
8887 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8888 : "=w"(result) \
8889 : "w"(a_), "w"(b_), "i"(c) \
8890 : /* No clobbers */); \
8891 result; \
8894 #define vmulxq_lane_f64(a, b, c) \
8895 __extension__ \
8896 ({ \
8897 float64x2_t b_ = (b); \
8898 float64x2_t a_ = (a); \
8899 float64x2_t result; \
8900 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
8901 : "=w"(result) \
8902 : "w"(a_), "w"(b_), "i"(c) \
8903 : /* No clobbers */); \
8904 result; \
8907 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8908 vmulxs_f32 (float32_t a, float32_t b)
8910 float32_t result;
8911 __asm__ ("fmulx %s0, %s1, %s2"
8912 : "=w"(result)
8913 : "w"(a), "w"(b)
8914 : /* No clobbers */);
8915 return result;
8918 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8919 vmvn_p8 (poly8x8_t a)
8921 poly8x8_t result;
8922 __asm__ ("mvn %0.8b,%1.8b"
8923 : "=w"(result)
8924 : "w"(a)
8925 : /* No clobbers */);
8926 return result;
8929 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8930 vmvn_s8 (int8x8_t a)
8932 int8x8_t result;
8933 __asm__ ("mvn %0.8b,%1.8b"
8934 : "=w"(result)
8935 : "w"(a)
8936 : /* No clobbers */);
8937 return result;
8940 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8941 vmvn_s16 (int16x4_t a)
8943 int16x4_t result;
8944 __asm__ ("mvn %0.8b,%1.8b"
8945 : "=w"(result)
8946 : "w"(a)
8947 : /* No clobbers */);
8948 return result;
8951 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8952 vmvn_s32 (int32x2_t a)
8954 int32x2_t result;
8955 __asm__ ("mvn %0.8b,%1.8b"
8956 : "=w"(result)
8957 : "w"(a)
8958 : /* No clobbers */);
8959 return result;
8962 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8963 vmvn_u8 (uint8x8_t a)
8965 uint8x8_t result;
8966 __asm__ ("mvn %0.8b,%1.8b"
8967 : "=w"(result)
8968 : "w"(a)
8969 : /* No clobbers */);
8970 return result;
8973 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8974 vmvn_u16 (uint16x4_t a)
8976 uint16x4_t result;
8977 __asm__ ("mvn %0.8b,%1.8b"
8978 : "=w"(result)
8979 : "w"(a)
8980 : /* No clobbers */);
8981 return result;
8984 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8985 vmvn_u32 (uint32x2_t a)
8987 uint32x2_t result;
8988 __asm__ ("mvn %0.8b,%1.8b"
8989 : "=w"(result)
8990 : "w"(a)
8991 : /* No clobbers */);
8992 return result;
8995 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8996 vmvnq_p8 (poly8x16_t a)
8998 poly8x16_t result;
8999 __asm__ ("mvn %0.16b,%1.16b"
9000 : "=w"(result)
9001 : "w"(a)
9002 : /* No clobbers */);
9003 return result;
9006 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9007 vmvnq_s8 (int8x16_t a)
9009 int8x16_t result;
9010 __asm__ ("mvn %0.16b,%1.16b"
9011 : "=w"(result)
9012 : "w"(a)
9013 : /* No clobbers */);
9014 return result;
9017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9018 vmvnq_s16 (int16x8_t a)
9020 int16x8_t result;
9021 __asm__ ("mvn %0.16b,%1.16b"
9022 : "=w"(result)
9023 : "w"(a)
9024 : /* No clobbers */);
9025 return result;
9028 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9029 vmvnq_s32 (int32x4_t a)
9031 int32x4_t result;
9032 __asm__ ("mvn %0.16b,%1.16b"
9033 : "=w"(result)
9034 : "w"(a)
9035 : /* No clobbers */);
9036 return result;
9039 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9040 vmvnq_u8 (uint8x16_t a)
9042 uint8x16_t result;
9043 __asm__ ("mvn %0.16b,%1.16b"
9044 : "=w"(result)
9045 : "w"(a)
9046 : /* No clobbers */);
9047 return result;
9050 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9051 vmvnq_u16 (uint16x8_t a)
9053 uint16x8_t result;
9054 __asm__ ("mvn %0.16b,%1.16b"
9055 : "=w"(result)
9056 : "w"(a)
9057 : /* No clobbers */);
9058 return result;
9061 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9062 vmvnq_u32 (uint32x4_t a)
9064 uint32x4_t result;
9065 __asm__ ("mvn %0.16b,%1.16b"
9066 : "=w"(result)
9067 : "w"(a)
9068 : /* No clobbers */);
9069 return result;
9073 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9074 vpadal_s8 (int16x4_t a, int8x8_t b)
9076 int16x4_t result;
9077 __asm__ ("sadalp %0.4h,%2.8b"
9078 : "=w"(result)
9079 : "0"(a), "w"(b)
9080 : /* No clobbers */);
9081 return result;
9084 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9085 vpadal_s16 (int32x2_t a, int16x4_t b)
9087 int32x2_t result;
9088 __asm__ ("sadalp %0.2s,%2.4h"
9089 : "=w"(result)
9090 : "0"(a), "w"(b)
9091 : /* No clobbers */);
9092 return result;
9095 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9096 vpadal_s32 (int64x1_t a, int32x2_t b)
9098 int64x1_t result;
9099 __asm__ ("sadalp %0.1d,%2.2s"
9100 : "=w"(result)
9101 : "0"(a), "w"(b)
9102 : /* No clobbers */);
9103 return result;
9106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9107 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9109 uint16x4_t result;
9110 __asm__ ("uadalp %0.4h,%2.8b"
9111 : "=w"(result)
9112 : "0"(a), "w"(b)
9113 : /* No clobbers */);
9114 return result;
9117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9118 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9120 uint32x2_t result;
9121 __asm__ ("uadalp %0.2s,%2.4h"
9122 : "=w"(result)
9123 : "0"(a), "w"(b)
9124 : /* No clobbers */);
9125 return result;
9128 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9129 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9131 uint64x1_t result;
9132 __asm__ ("uadalp %0.1d,%2.2s"
9133 : "=w"(result)
9134 : "0"(a), "w"(b)
9135 : /* No clobbers */);
9136 return result;
9139 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9140 vpadalq_s8 (int16x8_t a, int8x16_t b)
9142 int16x8_t result;
9143 __asm__ ("sadalp %0.8h,%2.16b"
9144 : "=w"(result)
9145 : "0"(a), "w"(b)
9146 : /* No clobbers */);
9147 return result;
9150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9151 vpadalq_s16 (int32x4_t a, int16x8_t b)
9153 int32x4_t result;
9154 __asm__ ("sadalp %0.4s,%2.8h"
9155 : "=w"(result)
9156 : "0"(a), "w"(b)
9157 : /* No clobbers */);
9158 return result;
9161 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9162 vpadalq_s32 (int64x2_t a, int32x4_t b)
9164 int64x2_t result;
9165 __asm__ ("sadalp %0.2d,%2.4s"
9166 : "=w"(result)
9167 : "0"(a), "w"(b)
9168 : /* No clobbers */);
9169 return result;
9172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9173 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9175 uint16x8_t result;
9176 __asm__ ("uadalp %0.8h,%2.16b"
9177 : "=w"(result)
9178 : "0"(a), "w"(b)
9179 : /* No clobbers */);
9180 return result;
9183 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9184 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9186 uint32x4_t result;
9187 __asm__ ("uadalp %0.4s,%2.8h"
9188 : "=w"(result)
9189 : "0"(a), "w"(b)
9190 : /* No clobbers */);
9191 return result;
9194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9195 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9197 uint64x2_t result;
9198 __asm__ ("uadalp %0.2d,%2.4s"
9199 : "=w"(result)
9200 : "0"(a), "w"(b)
9201 : /* No clobbers */);
9202 return result;
9205 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9206 vpadd_f32 (float32x2_t a, float32x2_t b)
9208 float32x2_t result;
9209 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9210 : "=w"(result)
9211 : "w"(a), "w"(b)
9212 : /* No clobbers */);
9213 return result;
9216 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9217 vpaddl_s8 (int8x8_t a)
9219 int16x4_t result;
9220 __asm__ ("saddlp %0.4h,%1.8b"
9221 : "=w"(result)
9222 : "w"(a)
9223 : /* No clobbers */);
9224 return result;
9227 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9228 vpaddl_s16 (int16x4_t a)
9230 int32x2_t result;
9231 __asm__ ("saddlp %0.2s,%1.4h"
9232 : "=w"(result)
9233 : "w"(a)
9234 : /* No clobbers */);
9235 return result;
9238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9239 vpaddl_s32 (int32x2_t a)
9241 int64x1_t result;
9242 __asm__ ("saddlp %0.1d,%1.2s"
9243 : "=w"(result)
9244 : "w"(a)
9245 : /* No clobbers */);
9246 return result;
9249 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9250 vpaddl_u8 (uint8x8_t a)
9252 uint16x4_t result;
9253 __asm__ ("uaddlp %0.4h,%1.8b"
9254 : "=w"(result)
9255 : "w"(a)
9256 : /* No clobbers */);
9257 return result;
9260 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9261 vpaddl_u16 (uint16x4_t a)
9263 uint32x2_t result;
9264 __asm__ ("uaddlp %0.2s,%1.4h"
9265 : "=w"(result)
9266 : "w"(a)
9267 : /* No clobbers */);
9268 return result;
9271 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9272 vpaddl_u32 (uint32x2_t a)
9274 uint64x1_t result;
9275 __asm__ ("uaddlp %0.1d,%1.2s"
9276 : "=w"(result)
9277 : "w"(a)
9278 : /* No clobbers */);
9279 return result;
9282 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9283 vpaddlq_s8 (int8x16_t a)
9285 int16x8_t result;
9286 __asm__ ("saddlp %0.8h,%1.16b"
9287 : "=w"(result)
9288 : "w"(a)
9289 : /* No clobbers */);
9290 return result;
9293 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9294 vpaddlq_s16 (int16x8_t a)
9296 int32x4_t result;
9297 __asm__ ("saddlp %0.4s,%1.8h"
9298 : "=w"(result)
9299 : "w"(a)
9300 : /* No clobbers */);
9301 return result;
9304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9305 vpaddlq_s32 (int32x4_t a)
9307 int64x2_t result;
9308 __asm__ ("saddlp %0.2d,%1.4s"
9309 : "=w"(result)
9310 : "w"(a)
9311 : /* No clobbers */);
9312 return result;
9315 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9316 vpaddlq_u8 (uint8x16_t a)
9318 uint16x8_t result;
9319 __asm__ ("uaddlp %0.8h,%1.16b"
9320 : "=w"(result)
9321 : "w"(a)
9322 : /* No clobbers */);
9323 return result;
9326 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9327 vpaddlq_u16 (uint16x8_t a)
9329 uint32x4_t result;
9330 __asm__ ("uaddlp %0.4s,%1.8h"
9331 : "=w"(result)
9332 : "w"(a)
9333 : /* No clobbers */);
9334 return result;
9337 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9338 vpaddlq_u32 (uint32x4_t a)
9340 uint64x2_t result;
9341 __asm__ ("uaddlp %0.2d,%1.4s"
9342 : "=w"(result)
9343 : "w"(a)
9344 : /* No clobbers */);
9345 return result;
9348 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9349 vpaddq_f32 (float32x4_t a, float32x4_t b)
9351 float32x4_t result;
9352 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9353 : "=w"(result)
9354 : "w"(a), "w"(b)
9355 : /* No clobbers */);
9356 return result;
9359 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9360 vpaddq_f64 (float64x2_t a, float64x2_t b)
9362 float64x2_t result;
9363 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9364 : "=w"(result)
9365 : "w"(a), "w"(b)
9366 : /* No clobbers */);
9367 return result;
9370 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9371 vpaddq_s8 (int8x16_t a, int8x16_t b)
9373 int8x16_t result;
9374 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9375 : "=w"(result)
9376 : "w"(a), "w"(b)
9377 : /* No clobbers */);
9378 return result;
9381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9382 vpaddq_s16 (int16x8_t a, int16x8_t b)
9384 int16x8_t result;
9385 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9386 : "=w"(result)
9387 : "w"(a), "w"(b)
9388 : /* No clobbers */);
9389 return result;
9392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9393 vpaddq_s32 (int32x4_t a, int32x4_t b)
9395 int32x4_t result;
9396 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9397 : "=w"(result)
9398 : "w"(a), "w"(b)
9399 : /* No clobbers */);
9400 return result;
9403 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9404 vpaddq_s64 (int64x2_t a, int64x2_t b)
9406 int64x2_t result;
9407 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9408 : "=w"(result)
9409 : "w"(a), "w"(b)
9410 : /* No clobbers */);
9411 return result;
9414 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9415 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9417 uint8x16_t result;
9418 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9419 : "=w"(result)
9420 : "w"(a), "w"(b)
9421 : /* No clobbers */);
9422 return result;
9425 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9426 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9428 uint16x8_t result;
9429 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9430 : "=w"(result)
9431 : "w"(a), "w"(b)
9432 : /* No clobbers */);
9433 return result;
9436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9437 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9439 uint32x4_t result;
9440 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9441 : "=w"(result)
9442 : "w"(a), "w"(b)
9443 : /* No clobbers */);
9444 return result;
9447 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9448 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9450 uint64x2_t result;
9451 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9452 : "=w"(result)
9453 : "w"(a), "w"(b)
9454 : /* No clobbers */);
9455 return result;
9458 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9459 vpadds_f32 (float32x2_t a)
9461 float32_t result;
9462 __asm__ ("faddp %s0,%1.2s"
9463 : "=w"(result)
9464 : "w"(a)
9465 : /* No clobbers */);
9466 return result;
9469 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9470 vpmax_f32 (float32x2_t a, float32x2_t b)
9472 float32x2_t result;
9473 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9474 : "=w"(result)
9475 : "w"(a), "w"(b)
9476 : /* No clobbers */);
9477 return result;
9480 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9481 vpmax_s8 (int8x8_t a, int8x8_t b)
9483 int8x8_t result;
9484 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9485 : "=w"(result)
9486 : "w"(a), "w"(b)
9487 : /* No clobbers */);
9488 return result;
9491 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9492 vpmax_s16 (int16x4_t a, int16x4_t b)
9494 int16x4_t result;
9495 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9496 : "=w"(result)
9497 : "w"(a), "w"(b)
9498 : /* No clobbers */);
9499 return result;
9502 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9503 vpmax_s32 (int32x2_t a, int32x2_t b)
9505 int32x2_t result;
9506 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9507 : "=w"(result)
9508 : "w"(a), "w"(b)
9509 : /* No clobbers */);
9510 return result;
9513 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9514 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9516 uint8x8_t result;
9517 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9518 : "=w"(result)
9519 : "w"(a), "w"(b)
9520 : /* No clobbers */);
9521 return result;
9524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9525 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9527 uint16x4_t result;
9528 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9529 : "=w"(result)
9530 : "w"(a), "w"(b)
9531 : /* No clobbers */);
9532 return result;
9535 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9536 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9538 uint32x2_t result;
9539 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9540 : "=w"(result)
9541 : "w"(a), "w"(b)
9542 : /* No clobbers */);
9543 return result;
9546 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9547 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9549 float32x2_t result;
9550 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9551 : "=w"(result)
9552 : "w"(a), "w"(b)
9553 : /* No clobbers */);
9554 return result;
9557 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9558 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9560 float32x4_t result;
9561 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9562 : "=w"(result)
9563 : "w"(a), "w"(b)
9564 : /* No clobbers */);
9565 return result;
9568 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9569 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9571 float64x2_t result;
9572 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9573 : "=w"(result)
9574 : "w"(a), "w"(b)
9575 : /* No clobbers */);
9576 return result;
9579 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9580 vpmaxnmqd_f64 (float64x2_t a)
9582 float64_t result;
9583 __asm__ ("fmaxnmp %d0,%1.2d"
9584 : "=w"(result)
9585 : "w"(a)
9586 : /* No clobbers */);
9587 return result;
9590 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9591 vpmaxnms_f32 (float32x2_t a)
9593 float32_t result;
9594 __asm__ ("fmaxnmp %s0,%1.2s"
9595 : "=w"(result)
9596 : "w"(a)
9597 : /* No clobbers */);
9598 return result;
9601 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9602 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9604 float32x4_t result;
9605 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9606 : "=w"(result)
9607 : "w"(a), "w"(b)
9608 : /* No clobbers */);
9609 return result;
9612 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9613 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9615 float64x2_t result;
9616 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9617 : "=w"(result)
9618 : "w"(a), "w"(b)
9619 : /* No clobbers */);
9620 return result;
9623 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9624 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9626 int8x16_t result;
9627 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9628 : "=w"(result)
9629 : "w"(a), "w"(b)
9630 : /* No clobbers */);
9631 return result;
9634 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9635 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9637 int16x8_t result;
9638 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9639 : "=w"(result)
9640 : "w"(a), "w"(b)
9641 : /* No clobbers */);
9642 return result;
9645 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9646 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9648 int32x4_t result;
9649 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9650 : "=w"(result)
9651 : "w"(a), "w"(b)
9652 : /* No clobbers */);
9653 return result;
9656 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9657 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9659 uint8x16_t result;
9660 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9661 : "=w"(result)
9662 : "w"(a), "w"(b)
9663 : /* No clobbers */);
9664 return result;
9667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9668 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9670 uint16x8_t result;
9671 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9672 : "=w"(result)
9673 : "w"(a), "w"(b)
9674 : /* No clobbers */);
9675 return result;
9678 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9679 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9681 uint32x4_t result;
9682 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9683 : "=w"(result)
9684 : "w"(a), "w"(b)
9685 : /* No clobbers */);
9686 return result;
9689 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9690 vpmaxqd_f64 (float64x2_t a)
9692 float64_t result;
9693 __asm__ ("fmaxp %d0,%1.2d"
9694 : "=w"(result)
9695 : "w"(a)
9696 : /* No clobbers */);
9697 return result;
9700 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9701 vpmaxs_f32 (float32x2_t a)
9703 float32_t result;
9704 __asm__ ("fmaxp %s0,%1.2s"
9705 : "=w"(result)
9706 : "w"(a)
9707 : /* No clobbers */);
9708 return result;
9711 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9712 vpmin_f32 (float32x2_t a, float32x2_t b)
9714 float32x2_t result;
9715 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9716 : "=w"(result)
9717 : "w"(a), "w"(b)
9718 : /* No clobbers */);
9719 return result;
9722 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9723 vpmin_s8 (int8x8_t a, int8x8_t b)
9725 int8x8_t result;
9726 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9727 : "=w"(result)
9728 : "w"(a), "w"(b)
9729 : /* No clobbers */);
9730 return result;
9733 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9734 vpmin_s16 (int16x4_t a, int16x4_t b)
9736 int16x4_t result;
9737 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9738 : "=w"(result)
9739 : "w"(a), "w"(b)
9740 : /* No clobbers */);
9741 return result;
9744 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9745 vpmin_s32 (int32x2_t a, int32x2_t b)
9747 int32x2_t result;
9748 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9749 : "=w"(result)
9750 : "w"(a), "w"(b)
9751 : /* No clobbers */);
9752 return result;
9755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9756 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9758 uint8x8_t result;
9759 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9760 : "=w"(result)
9761 : "w"(a), "w"(b)
9762 : /* No clobbers */);
9763 return result;
9766 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9767 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9769 uint16x4_t result;
9770 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9771 : "=w"(result)
9772 : "w"(a), "w"(b)
9773 : /* No clobbers */);
9774 return result;
9777 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9778 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9780 uint32x2_t result;
9781 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9782 : "=w"(result)
9783 : "w"(a), "w"(b)
9784 : /* No clobbers */);
9785 return result;
9788 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9789 vpminnm_f32 (float32x2_t a, float32x2_t b)
9791 float32x2_t result;
9792 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9793 : "=w"(result)
9794 : "w"(a), "w"(b)
9795 : /* No clobbers */);
9796 return result;
9799 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9800 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9802 float32x4_t result;
9803 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9804 : "=w"(result)
9805 : "w"(a), "w"(b)
9806 : /* No clobbers */);
9807 return result;
9810 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9811 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9813 float64x2_t result;
9814 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9815 : "=w"(result)
9816 : "w"(a), "w"(b)
9817 : /* No clobbers */);
9818 return result;
9821 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9822 vpminnmqd_f64 (float64x2_t a)
9824 float64_t result;
9825 __asm__ ("fminnmp %d0,%1.2d"
9826 : "=w"(result)
9827 : "w"(a)
9828 : /* No clobbers */);
9829 return result;
9832 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9833 vpminnms_f32 (float32x2_t a)
9835 float32_t result;
9836 __asm__ ("fminnmp %s0,%1.2s"
9837 : "=w"(result)
9838 : "w"(a)
9839 : /* No clobbers */);
9840 return result;
9843 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9844 vpminq_f32 (float32x4_t a, float32x4_t b)
9846 float32x4_t result;
9847 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9848 : "=w"(result)
9849 : "w"(a), "w"(b)
9850 : /* No clobbers */);
9851 return result;
9854 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9855 vpminq_f64 (float64x2_t a, float64x2_t b)
9857 float64x2_t result;
9858 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9859 : "=w"(result)
9860 : "w"(a), "w"(b)
9861 : /* No clobbers */);
9862 return result;
9865 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9866 vpminq_s8 (int8x16_t a, int8x16_t b)
9868 int8x16_t result;
9869 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9870 : "=w"(result)
9871 : "w"(a), "w"(b)
9872 : /* No clobbers */);
9873 return result;
9876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9877 vpminq_s16 (int16x8_t a, int16x8_t b)
9879 int16x8_t result;
9880 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9881 : "=w"(result)
9882 : "w"(a), "w"(b)
9883 : /* No clobbers */);
9884 return result;
9887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9888 vpminq_s32 (int32x4_t a, int32x4_t b)
9890 int32x4_t result;
9891 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
9892 : "=w"(result)
9893 : "w"(a), "w"(b)
9894 : /* No clobbers */);
9895 return result;
9898 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9899 vpminq_u8 (uint8x16_t a, uint8x16_t b)
9901 uint8x16_t result;
9902 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
9903 : "=w"(result)
9904 : "w"(a), "w"(b)
9905 : /* No clobbers */);
9906 return result;
9909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9910 vpminq_u16 (uint16x8_t a, uint16x8_t b)
9912 uint16x8_t result;
9913 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
9914 : "=w"(result)
9915 : "w"(a), "w"(b)
9916 : /* No clobbers */);
9917 return result;
9920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9921 vpminq_u32 (uint32x4_t a, uint32x4_t b)
9923 uint32x4_t result;
9924 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
9925 : "=w"(result)
9926 : "w"(a), "w"(b)
9927 : /* No clobbers */);
9928 return result;
9931 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9932 vpminqd_f64 (float64x2_t a)
9934 float64_t result;
9935 __asm__ ("fminp %d0,%1.2d"
9936 : "=w"(result)
9937 : "w"(a)
9938 : /* No clobbers */);
9939 return result;
9942 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9943 vpmins_f32 (float32x2_t a)
9945 float32_t result;
9946 __asm__ ("fminp %s0,%1.2s"
9947 : "=w"(result)
9948 : "w"(a)
9949 : /* No clobbers */);
9950 return result;
9953 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9954 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9956 int16x4_t result;
9957 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9958 : "=w"(result)
9959 : "w"(a), "x"(b)
9960 : /* No clobbers */);
9961 return result;
9964 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9965 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9967 int32x2_t result;
9968 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9969 : "=w"(result)
9970 : "w"(a), "w"(b)
9971 : /* No clobbers */);
9972 return result;
9975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9976 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9978 int16x8_t result;
9979 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9980 : "=w"(result)
9981 : "w"(a), "x"(b)
9982 : /* No clobbers */);
9983 return result;
9986 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9987 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9989 int32x4_t result;
9990 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9991 : "=w"(result)
9992 : "w"(a), "w"(b)
9993 : /* No clobbers */);
9994 return result;
9997 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9998 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10000 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10001 __asm__ ("sqxtn2 %0.16b, %1.8h"
10002 : "+w"(result)
10003 : "w"(b)
10004 : /* No clobbers */);
10005 return result;
10008 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10009 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10011 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10012 __asm__ ("sqxtn2 %0.8h, %1.4s"
10013 : "+w"(result)
10014 : "w"(b)
10015 : /* No clobbers */);
10016 return result;
10019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10020 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10022 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10023 __asm__ ("sqxtn2 %0.4s, %1.2d"
10024 : "+w"(result)
10025 : "w"(b)
10026 : /* No clobbers */);
10027 return result;
10030 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10031 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10033 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10034 __asm__ ("uqxtn2 %0.16b, %1.8h"
10035 : "+w"(result)
10036 : "w"(b)
10037 : /* No clobbers */);
10038 return result;
10041 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10042 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10044 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10045 __asm__ ("uqxtn2 %0.8h, %1.4s"
10046 : "+w"(result)
10047 : "w"(b)
10048 : /* No clobbers */);
10049 return result;
10052 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10053 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10055 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10056 __asm__ ("uqxtn2 %0.4s, %1.2d"
10057 : "+w"(result)
10058 : "w"(b)
10059 : /* No clobbers */);
10060 return result;
10063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10064 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10066 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10067 __asm__ ("sqxtun2 %0.16b, %1.8h"
10068 : "+w"(result)
10069 : "w"(b)
10070 : /* No clobbers */);
10071 return result;
10074 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10075 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10077 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10078 __asm__ ("sqxtun2 %0.8h, %1.4s"
10079 : "+w"(result)
10080 : "w"(b)
10081 : /* No clobbers */);
10082 return result;
10085 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10086 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10088 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10089 __asm__ ("sqxtun2 %0.4s, %1.2d"
10090 : "+w"(result)
10091 : "w"(b)
10092 : /* No clobbers */);
10093 return result;
10096 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10097 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10099 int16x4_t result;
10100 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10101 : "=w"(result)
10102 : "w"(a), "x"(b)
10103 : /* No clobbers */);
10104 return result;
10107 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10108 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10110 int32x2_t result;
10111 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10112 : "=w"(result)
10113 : "w"(a), "w"(b)
10114 : /* No clobbers */);
10115 return result;
10118 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10119 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10121 int16x8_t result;
10122 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10123 : "=w"(result)
10124 : "w"(a), "x"(b)
10125 : /* No clobbers */);
10126 return result;
10129 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10130 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10132 int32x4_t result;
10133 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10134 : "=w"(result)
10135 : "w"(a), "w"(b)
10136 : /* No clobbers */);
10137 return result;
10140 #define vqrshrn_high_n_s16(a, b, c) \
10141 __extension__ \
10142 ({ \
10143 int16x8_t b_ = (b); \
10144 int8x8_t a_ = (a); \
10145 int8x16_t result = vcombine_s8 \
10146 (a_, vcreate_s8 \
10147 (__AARCH64_UINT64_C (0x0))); \
10148 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10149 : "+w"(result) \
10150 : "w"(b_), "i"(c) \
10151 : /* No clobbers */); \
10152 result; \
10155 #define vqrshrn_high_n_s32(a, b, c) \
10156 __extension__ \
10157 ({ \
10158 int32x4_t b_ = (b); \
10159 int16x4_t a_ = (a); \
10160 int16x8_t result = vcombine_s16 \
10161 (a_, vcreate_s16 \
10162 (__AARCH64_UINT64_C (0x0))); \
10163 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10164 : "+w"(result) \
10165 : "w"(b_), "i"(c) \
10166 : /* No clobbers */); \
10167 result; \
10170 #define vqrshrn_high_n_s64(a, b, c) \
10171 __extension__ \
10172 ({ \
10173 int64x2_t b_ = (b); \
10174 int32x2_t a_ = (a); \
10175 int32x4_t result = vcombine_s32 \
10176 (a_, vcreate_s32 \
10177 (__AARCH64_UINT64_C (0x0))); \
10178 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10179 : "+w"(result) \
10180 : "w"(b_), "i"(c) \
10181 : /* No clobbers */); \
10182 result; \
10185 #define vqrshrn_high_n_u16(a, b, c) \
10186 __extension__ \
10187 ({ \
10188 uint16x8_t b_ = (b); \
10189 uint8x8_t a_ = (a); \
10190 uint8x16_t result = vcombine_u8 \
10191 (a_, vcreate_u8 \
10192 (__AARCH64_UINT64_C (0x0))); \
10193 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10194 : "+w"(result) \
10195 : "w"(b_), "i"(c) \
10196 : /* No clobbers */); \
10197 result; \
10200 #define vqrshrn_high_n_u32(a, b, c) \
10201 __extension__ \
10202 ({ \
10203 uint32x4_t b_ = (b); \
10204 uint16x4_t a_ = (a); \
10205 uint16x8_t result = vcombine_u16 \
10206 (a_, vcreate_u16 \
10207 (__AARCH64_UINT64_C (0x0))); \
10208 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10209 : "+w"(result) \
10210 : "w"(b_), "i"(c) \
10211 : /* No clobbers */); \
10212 result; \
10215 #define vqrshrn_high_n_u64(a, b, c) \
10216 __extension__ \
10217 ({ \
10218 uint64x2_t b_ = (b); \
10219 uint32x2_t a_ = (a); \
10220 uint32x4_t result = vcombine_u32 \
10221 (a_, vcreate_u32 \
10222 (__AARCH64_UINT64_C (0x0))); \
10223 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10224 : "+w"(result) \
10225 : "w"(b_), "i"(c) \
10226 : /* No clobbers */); \
10227 result; \
10230 #define vqrshrun_high_n_s16(a, b, c) \
10231 __extension__ \
10232 ({ \
10233 int16x8_t b_ = (b); \
10234 uint8x8_t a_ = (a); \
10235 uint8x16_t result = vcombine_u8 \
10236 (a_, vcreate_u8 \
10237 (__AARCH64_UINT64_C (0x0))); \
10238 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10239 : "+w"(result) \
10240 : "w"(b_), "i"(c) \
10241 : /* No clobbers */); \
10242 result; \
10245 #define vqrshrun_high_n_s32(a, b, c) \
10246 __extension__ \
10247 ({ \
10248 int32x4_t b_ = (b); \
10249 uint16x4_t a_ = (a); \
10250 uint16x8_t result = vcombine_u16 \
10251 (a_, vcreate_u16 \
10252 (__AARCH64_UINT64_C (0x0))); \
10253 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10254 : "+w"(result) \
10255 : "w"(b_), "i"(c) \
10256 : /* No clobbers */); \
10257 result; \
10260 #define vqrshrun_high_n_s64(a, b, c) \
10261 __extension__ \
10262 ({ \
10263 int64x2_t b_ = (b); \
10264 uint32x2_t a_ = (a); \
10265 uint32x4_t result = vcombine_u32 \
10266 (a_, vcreate_u32 \
10267 (__AARCH64_UINT64_C (0x0))); \
10268 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10269 : "+w"(result) \
10270 : "w"(b_), "i"(c) \
10271 : /* No clobbers */); \
10272 result; \
10275 #define vqshrn_high_n_s16(a, b, c) \
10276 __extension__ \
10277 ({ \
10278 int16x8_t b_ = (b); \
10279 int8x8_t a_ = (a); \
10280 int8x16_t result = vcombine_s8 \
10281 (a_, vcreate_s8 \
10282 (__AARCH64_UINT64_C (0x0))); \
10283 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10284 : "+w"(result) \
10285 : "w"(b_), "i"(c) \
10286 : /* No clobbers */); \
10287 result; \
10290 #define vqshrn_high_n_s32(a, b, c) \
10291 __extension__ \
10292 ({ \
10293 int32x4_t b_ = (b); \
10294 int16x4_t a_ = (a); \
10295 int16x8_t result = vcombine_s16 \
10296 (a_, vcreate_s16 \
10297 (__AARCH64_UINT64_C (0x0))); \
10298 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10299 : "+w"(result) \
10300 : "w"(b_), "i"(c) \
10301 : /* No clobbers */); \
10302 result; \
10305 #define vqshrn_high_n_s64(a, b, c) \
10306 __extension__ \
10307 ({ \
10308 int64x2_t b_ = (b); \
10309 int32x2_t a_ = (a); \
10310 int32x4_t result = vcombine_s32 \
10311 (a_, vcreate_s32 \
10312 (__AARCH64_UINT64_C (0x0))); \
10313 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10314 : "+w"(result) \
10315 : "w"(b_), "i"(c) \
10316 : /* No clobbers */); \
10317 result; \
10320 #define vqshrn_high_n_u16(a, b, c) \
10321 __extension__ \
10322 ({ \
10323 uint16x8_t b_ = (b); \
10324 uint8x8_t a_ = (a); \
10325 uint8x16_t result = vcombine_u8 \
10326 (a_, vcreate_u8 \
10327 (__AARCH64_UINT64_C (0x0))); \
10328 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10329 : "+w"(result) \
10330 : "w"(b_), "i"(c) \
10331 : /* No clobbers */); \
10332 result; \
10335 #define vqshrn_high_n_u32(a, b, c) \
10336 __extension__ \
10337 ({ \
10338 uint32x4_t b_ = (b); \
10339 uint16x4_t a_ = (a); \
10340 uint16x8_t result = vcombine_u16 \
10341 (a_, vcreate_u16 \
10342 (__AARCH64_UINT64_C (0x0))); \
10343 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10344 : "+w"(result) \
10345 : "w"(b_), "i"(c) \
10346 : /* No clobbers */); \
10347 result; \
10350 #define vqshrn_high_n_u64(a, b, c) \
10351 __extension__ \
10352 ({ \
10353 uint64x2_t b_ = (b); \
10354 uint32x2_t a_ = (a); \
10355 uint32x4_t result = vcombine_u32 \
10356 (a_, vcreate_u32 \
10357 (__AARCH64_UINT64_C (0x0))); \
10358 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10359 : "+w"(result) \
10360 : "w"(b_), "i"(c) \
10361 : /* No clobbers */); \
10362 result; \
10365 #define vqshrun_high_n_s16(a, b, c) \
10366 __extension__ \
10367 ({ \
10368 int16x8_t b_ = (b); \
10369 uint8x8_t a_ = (a); \
10370 uint8x16_t result = vcombine_u8 \
10371 (a_, vcreate_u8 \
10372 (__AARCH64_UINT64_C (0x0))); \
10373 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10374 : "+w"(result) \
10375 : "w"(b_), "i"(c) \
10376 : /* No clobbers */); \
10377 result; \
10380 #define vqshrun_high_n_s32(a, b, c) \
10381 __extension__ \
10382 ({ \
10383 int32x4_t b_ = (b); \
10384 uint16x4_t a_ = (a); \
10385 uint16x8_t result = vcombine_u16 \
10386 (a_, vcreate_u16 \
10387 (__AARCH64_UINT64_C (0x0))); \
10388 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10389 : "+w"(result) \
10390 : "w"(b_), "i"(c) \
10391 : /* No clobbers */); \
10392 result; \
10395 #define vqshrun_high_n_s64(a, b, c) \
10396 __extension__ \
10397 ({ \
10398 int64x2_t b_ = (b); \
10399 uint32x2_t a_ = (a); \
10400 uint32x4_t result = vcombine_u32 \
10401 (a_, vcreate_u32 \
10402 (__AARCH64_UINT64_C (0x0))); \
10403 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10404 : "+w"(result) \
10405 : "w"(b_), "i"(c) \
10406 : /* No clobbers */); \
10407 result; \
10410 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10411 vrbit_s8 (int8x8_t a)
10413 int8x8_t result;
10414 __asm__ ("rbit %0.8b,%1.8b"
10415 : "=w"(result)
10416 : "w"(a)
10417 : /* No clobbers */);
10418 return result;
10421 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10422 vrbit_u8 (uint8x8_t a)
10424 uint8x8_t result;
10425 __asm__ ("rbit %0.8b,%1.8b"
10426 : "=w"(result)
10427 : "w"(a)
10428 : /* No clobbers */);
10429 return result;
10432 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10433 vrbitq_s8 (int8x16_t a)
10435 int8x16_t result;
10436 __asm__ ("rbit %0.16b,%1.16b"
10437 : "=w"(result)
10438 : "w"(a)
10439 : /* No clobbers */);
10440 return result;
10443 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10444 vrbitq_u8 (uint8x16_t a)
10446 uint8x16_t result;
10447 __asm__ ("rbit %0.16b,%1.16b"
10448 : "=w"(result)
10449 : "w"(a)
10450 : /* No clobbers */);
10451 return result;
10454 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10455 vrecpe_u32 (uint32x2_t a)
10457 uint32x2_t result;
10458 __asm__ ("urecpe %0.2s,%1.2s"
10459 : "=w"(result)
10460 : "w"(a)
10461 : /* No clobbers */);
10462 return result;
10465 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10466 vrecpeq_u32 (uint32x4_t a)
10468 uint32x4_t result;
10469 __asm__ ("urecpe %0.4s,%1.4s"
10470 : "=w"(result)
10471 : "w"(a)
10472 : /* No clobbers */);
10473 return result;
10476 #define vrshrn_high_n_s16(a, b, c) \
10477 __extension__ \
10478 ({ \
10479 int16x8_t b_ = (b); \
10480 int8x8_t a_ = (a); \
10481 int8x16_t result = vcombine_s8 \
10482 (a_, vcreate_s8 \
10483 (__AARCH64_UINT64_C (0x0))); \
10484 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10485 : "+w"(result) \
10486 : "w"(b_), "i"(c) \
10487 : /* No clobbers */); \
10488 result; \
10491 #define vrshrn_high_n_s32(a, b, c) \
10492 __extension__ \
10493 ({ \
10494 int32x4_t b_ = (b); \
10495 int16x4_t a_ = (a); \
10496 int16x8_t result = vcombine_s16 \
10497 (a_, vcreate_s16 \
10498 (__AARCH64_UINT64_C (0x0))); \
10499 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10500 : "+w"(result) \
10501 : "w"(b_), "i"(c) \
10502 : /* No clobbers */); \
10503 result; \
10506 #define vrshrn_high_n_s64(a, b, c) \
10507 __extension__ \
10508 ({ \
10509 int64x2_t b_ = (b); \
10510 int32x2_t a_ = (a); \
10511 int32x4_t result = vcombine_s32 \
10512 (a_, vcreate_s32 \
10513 (__AARCH64_UINT64_C (0x0))); \
10514 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10515 : "+w"(result) \
10516 : "w"(b_), "i"(c) \
10517 : /* No clobbers */); \
10518 result; \
10521 #define vrshrn_high_n_u16(a, b, c) \
10522 __extension__ \
10523 ({ \
10524 uint16x8_t b_ = (b); \
10525 uint8x8_t a_ = (a); \
10526 uint8x16_t result = vcombine_u8 \
10527 (a_, vcreate_u8 \
10528 (__AARCH64_UINT64_C (0x0))); \
10529 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10530 : "+w"(result) \
10531 : "w"(b_), "i"(c) \
10532 : /* No clobbers */); \
10533 result; \
10536 #define vrshrn_high_n_u32(a, b, c) \
10537 __extension__ \
10538 ({ \
10539 uint32x4_t b_ = (b); \
10540 uint16x4_t a_ = (a); \
10541 uint16x8_t result = vcombine_u16 \
10542 (a_, vcreate_u16 \
10543 (__AARCH64_UINT64_C (0x0))); \
10544 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10545 : "+w"(result) \
10546 : "w"(b_), "i"(c) \
10547 : /* No clobbers */); \
10548 result; \
10551 #define vrshrn_high_n_u64(a, b, c) \
10552 __extension__ \
10553 ({ \
10554 uint64x2_t b_ = (b); \
10555 uint32x2_t a_ = (a); \
10556 uint32x4_t result = vcombine_u32 \
10557 (a_, vcreate_u32 \
10558 (__AARCH64_UINT64_C (0x0))); \
10559 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10560 : "+w"(result) \
10561 : "w"(b_), "i"(c) \
10562 : /* No clobbers */); \
10563 result; \
10566 #define vrshrn_n_s16(a, b) \
10567 __extension__ \
10568 ({ \
10569 int16x8_t a_ = (a); \
10570 int8x8_t result; \
10571 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10572 : "=w"(result) \
10573 : "w"(a_), "i"(b) \
10574 : /* No clobbers */); \
10575 result; \
10578 #define vrshrn_n_s32(a, b) \
10579 __extension__ \
10580 ({ \
10581 int32x4_t a_ = (a); \
10582 int16x4_t result; \
10583 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10584 : "=w"(result) \
10585 : "w"(a_), "i"(b) \
10586 : /* No clobbers */); \
10587 result; \
10590 #define vrshrn_n_s64(a, b) \
10591 __extension__ \
10592 ({ \
10593 int64x2_t a_ = (a); \
10594 int32x2_t result; \
10595 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10596 : "=w"(result) \
10597 : "w"(a_), "i"(b) \
10598 : /* No clobbers */); \
10599 result; \
10602 #define vrshrn_n_u16(a, b) \
10603 __extension__ \
10604 ({ \
10605 uint16x8_t a_ = (a); \
10606 uint8x8_t result; \
10607 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10608 : "=w"(result) \
10609 : "w"(a_), "i"(b) \
10610 : /* No clobbers */); \
10611 result; \
10614 #define vrshrn_n_u32(a, b) \
10615 __extension__ \
10616 ({ \
10617 uint32x4_t a_ = (a); \
10618 uint16x4_t result; \
10619 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10620 : "=w"(result) \
10621 : "w"(a_), "i"(b) \
10622 : /* No clobbers */); \
10623 result; \
10626 #define vrshrn_n_u64(a, b) \
10627 __extension__ \
10628 ({ \
10629 uint64x2_t a_ = (a); \
10630 uint32x2_t result; \
10631 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10632 : "=w"(result) \
10633 : "w"(a_), "i"(b) \
10634 : /* No clobbers */); \
10635 result; \
10638 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10639 vrsqrte_f32 (float32x2_t a)
10641 float32x2_t result;
10642 __asm__ ("frsqrte %0.2s,%1.2s"
10643 : "=w"(result)
10644 : "w"(a)
10645 : /* No clobbers */);
10646 return result;
10649 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10650 vrsqrte_f64 (float64x1_t a)
10652 float64x1_t result;
10653 __asm__ ("frsqrte %d0,%d1"
10654 : "=w"(result)
10655 : "w"(a)
10656 : /* No clobbers */);
10657 return result;
10660 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10661 vrsqrte_u32 (uint32x2_t a)
10663 uint32x2_t result;
10664 __asm__ ("ursqrte %0.2s,%1.2s"
10665 : "=w"(result)
10666 : "w"(a)
10667 : /* No clobbers */);
10668 return result;
10671 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10672 vrsqrted_f64 (float64_t a)
10674 float64_t result;
10675 __asm__ ("frsqrte %d0,%d1"
10676 : "=w"(result)
10677 : "w"(a)
10678 : /* No clobbers */);
10679 return result;
10682 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10683 vrsqrteq_f32 (float32x4_t a)
10685 float32x4_t result;
10686 __asm__ ("frsqrte %0.4s,%1.4s"
10687 : "=w"(result)
10688 : "w"(a)
10689 : /* No clobbers */);
10690 return result;
10693 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10694 vrsqrteq_f64 (float64x2_t a)
10696 float64x2_t result;
10697 __asm__ ("frsqrte %0.2d,%1.2d"
10698 : "=w"(result)
10699 : "w"(a)
10700 : /* No clobbers */);
10701 return result;
10704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10705 vrsqrteq_u32 (uint32x4_t a)
10707 uint32x4_t result;
10708 __asm__ ("ursqrte %0.4s,%1.4s"
10709 : "=w"(result)
10710 : "w"(a)
10711 : /* No clobbers */);
10712 return result;
10715 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10716 vrsqrtes_f32 (float32_t a)
10718 float32_t result;
10719 __asm__ ("frsqrte %s0,%s1"
10720 : "=w"(result)
10721 : "w"(a)
10722 : /* No clobbers */);
10723 return result;
10726 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10727 vrsqrts_f32 (float32x2_t a, float32x2_t b)
10729 float32x2_t result;
10730 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
10731 : "=w"(result)
10732 : "w"(a), "w"(b)
10733 : /* No clobbers */);
10734 return result;
10737 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10738 vrsqrtsd_f64 (float64_t a, float64_t b)
10740 float64_t result;
10741 __asm__ ("frsqrts %d0,%d1,%d2"
10742 : "=w"(result)
10743 : "w"(a), "w"(b)
10744 : /* No clobbers */);
10745 return result;
10748 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10749 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10751 float32x4_t result;
10752 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10753 : "=w"(result)
10754 : "w"(a), "w"(b)
10755 : /* No clobbers */);
10756 return result;
10759 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10760 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10762 float64x2_t result;
10763 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10764 : "=w"(result)
10765 : "w"(a), "w"(b)
10766 : /* No clobbers */);
10767 return result;
10770 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10771 vrsqrtss_f32 (float32_t a, float32_t b)
10773 float32_t result;
10774 __asm__ ("frsqrts %s0,%s1,%s2"
10775 : "=w"(result)
10776 : "w"(a), "w"(b)
10777 : /* No clobbers */);
10778 return result;
10781 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10782 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
10784 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10785 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10786 : "+w"(result)
10787 : "w"(b), "w"(c)
10788 : /* No clobbers */);
10789 return result;
10792 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10793 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
10795 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10796 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10797 : "+w"(result)
10798 : "w"(b), "w"(c)
10799 : /* No clobbers */);
10800 return result;
10803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10804 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
10806 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10807 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10808 : "+w"(result)
10809 : "w"(b), "w"(c)
10810 : /* No clobbers */);
10811 return result;
10814 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10815 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
10817 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10818 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10819 : "+w"(result)
10820 : "w"(b), "w"(c)
10821 : /* No clobbers */);
10822 return result;
10825 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10826 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
10828 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10829 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10830 : "+w"(result)
10831 : "w"(b), "w"(c)
10832 : /* No clobbers */);
10833 return result;
10836 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10837 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
10839 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10840 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10841 : "+w"(result)
10842 : "w"(b), "w"(c)
10843 : /* No clobbers */);
10844 return result;
10847 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10848 vrsubhn_s16 (int16x8_t a, int16x8_t b)
10850 int8x8_t result;
10851 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10852 : "=w"(result)
10853 : "w"(a), "w"(b)
10854 : /* No clobbers */);
10855 return result;
10858 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10859 vrsubhn_s32 (int32x4_t a, int32x4_t b)
10861 int16x4_t result;
10862 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10863 : "=w"(result)
10864 : "w"(a), "w"(b)
10865 : /* No clobbers */);
10866 return result;
10869 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10870 vrsubhn_s64 (int64x2_t a, int64x2_t b)
10872 int32x2_t result;
10873 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10874 : "=w"(result)
10875 : "w"(a), "w"(b)
10876 : /* No clobbers */);
10877 return result;
10880 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10881 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
10883 uint8x8_t result;
10884 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10885 : "=w"(result)
10886 : "w"(a), "w"(b)
10887 : /* No clobbers */);
10888 return result;
10891 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10892 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
10894 uint16x4_t result;
10895 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10896 : "=w"(result)
10897 : "w"(a), "w"(b)
10898 : /* No clobbers */);
10899 return result;
10902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10903 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
10905 uint32x2_t result;
10906 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10907 : "=w"(result)
10908 : "w"(a), "w"(b)
10909 : /* No clobbers */);
10910 return result;
10913 #define vset_lane_f32(a, b, c) \
10914 __extension__ \
10915 ({ \
10916 float32x2_t b_ = (b); \
10917 float32_t a_ = (a); \
10918 float32x2_t result; \
10919 __asm__ ("ins %0.s[%3], %w1" \
10920 : "=w"(result) \
10921 : "r"(a_), "0"(b_), "i"(c) \
10922 : /* No clobbers */); \
10923 result; \
10926 #define vset_lane_f64(a, b, c) \
10927 __extension__ \
10928 ({ \
10929 float64x1_t b_ = (b); \
10930 float64_t a_ = (a); \
10931 float64x1_t result; \
10932 __asm__ ("ins %0.d[%3], %x1" \
10933 : "=w"(result) \
10934 : "r"(a_), "0"(b_), "i"(c) \
10935 : /* No clobbers */); \
10936 result; \
10939 #define vset_lane_p8(a, b, c) \
10940 __extension__ \
10941 ({ \
10942 poly8x8_t b_ = (b); \
10943 poly8_t a_ = (a); \
10944 poly8x8_t result; \
10945 __asm__ ("ins %0.b[%3], %w1" \
10946 : "=w"(result) \
10947 : "r"(a_), "0"(b_), "i"(c) \
10948 : /* No clobbers */); \
10949 result; \
10952 #define vset_lane_p16(a, b, c) \
10953 __extension__ \
10954 ({ \
10955 poly16x4_t b_ = (b); \
10956 poly16_t a_ = (a); \
10957 poly16x4_t result; \
10958 __asm__ ("ins %0.h[%3], %w1" \
10959 : "=w"(result) \
10960 : "r"(a_), "0"(b_), "i"(c) \
10961 : /* No clobbers */); \
10962 result; \
10965 #define vset_lane_s8(a, b, c) \
10966 __extension__ \
10967 ({ \
10968 int8x8_t b_ = (b); \
10969 int8_t a_ = (a); \
10970 int8x8_t result; \
10971 __asm__ ("ins %0.b[%3], %w1" \
10972 : "=w"(result) \
10973 : "r"(a_), "0"(b_), "i"(c) \
10974 : /* No clobbers */); \
10975 result; \
10978 #define vset_lane_s16(a, b, c) \
10979 __extension__ \
10980 ({ \
10981 int16x4_t b_ = (b); \
10982 int16_t a_ = (a); \
10983 int16x4_t result; \
10984 __asm__ ("ins %0.h[%3], %w1" \
10985 : "=w"(result) \
10986 : "r"(a_), "0"(b_), "i"(c) \
10987 : /* No clobbers */); \
10988 result; \
10991 #define vset_lane_s32(a, b, c) \
10992 __extension__ \
10993 ({ \
10994 int32x2_t b_ = (b); \
10995 int32_t a_ = (a); \
10996 int32x2_t result; \
10997 __asm__ ("ins %0.s[%3], %w1" \
10998 : "=w"(result) \
10999 : "r"(a_), "0"(b_), "i"(c) \
11000 : /* No clobbers */); \
11001 result; \
11004 #define vset_lane_s64(a, b, c) \
11005 __extension__ \
11006 ({ \
11007 int64x1_t b_ = (b); \
11008 int64_t a_ = (a); \
11009 int64x1_t result; \
11010 __asm__ ("ins %0.d[%3], %x1" \
11011 : "=w"(result) \
11012 : "r"(a_), "0"(b_), "i"(c) \
11013 : /* No clobbers */); \
11014 result; \
11017 #define vset_lane_u8(a, b, c) \
11018 __extension__ \
11019 ({ \
11020 uint8x8_t b_ = (b); \
11021 uint8_t a_ = (a); \
11022 uint8x8_t result; \
11023 __asm__ ("ins %0.b[%3], %w1" \
11024 : "=w"(result) \
11025 : "r"(a_), "0"(b_), "i"(c) \
11026 : /* No clobbers */); \
11027 result; \
11030 #define vset_lane_u16(a, b, c) \
11031 __extension__ \
11032 ({ \
11033 uint16x4_t b_ = (b); \
11034 uint16_t a_ = (a); \
11035 uint16x4_t result; \
11036 __asm__ ("ins %0.h[%3], %w1" \
11037 : "=w"(result) \
11038 : "r"(a_), "0"(b_), "i"(c) \
11039 : /* No clobbers */); \
11040 result; \
11043 #define vset_lane_u32(a, b, c) \
11044 __extension__ \
11045 ({ \
11046 uint32x2_t b_ = (b); \
11047 uint32_t a_ = (a); \
11048 uint32x2_t result; \
11049 __asm__ ("ins %0.s[%3], %w1" \
11050 : "=w"(result) \
11051 : "r"(a_), "0"(b_), "i"(c) \
11052 : /* No clobbers */); \
11053 result; \
11056 #define vset_lane_u64(a, b, c) \
11057 __extension__ \
11058 ({ \
11059 uint64x1_t b_ = (b); \
11060 uint64_t a_ = (a); \
11061 uint64x1_t result; \
11062 __asm__ ("ins %0.d[%3], %x1" \
11063 : "=w"(result) \
11064 : "r"(a_), "0"(b_), "i"(c) \
11065 : /* No clobbers */); \
11066 result; \
11069 #define vsetq_lane_f32(a, b, c) \
11070 __extension__ \
11071 ({ \
11072 float32x4_t b_ = (b); \
11073 float32_t a_ = (a); \
11074 float32x4_t result; \
11075 __asm__ ("ins %0.s[%3], %w1" \
11076 : "=w"(result) \
11077 : "r"(a_), "0"(b_), "i"(c) \
11078 : /* No clobbers */); \
11079 result; \
11082 #define vsetq_lane_f64(a, b, c) \
11083 __extension__ \
11084 ({ \
11085 float64x2_t b_ = (b); \
11086 float64_t a_ = (a); \
11087 float64x2_t result; \
11088 __asm__ ("ins %0.d[%3], %x1" \
11089 : "=w"(result) \
11090 : "r"(a_), "0"(b_), "i"(c) \
11091 : /* No clobbers */); \
11092 result; \
11095 #define vsetq_lane_p8(a, b, c) \
11096 __extension__ \
11097 ({ \
11098 poly8x16_t b_ = (b); \
11099 poly8_t a_ = (a); \
11100 poly8x16_t result; \
11101 __asm__ ("ins %0.b[%3], %w1" \
11102 : "=w"(result) \
11103 : "r"(a_), "0"(b_), "i"(c) \
11104 : /* No clobbers */); \
11105 result; \
11108 #define vsetq_lane_p16(a, b, c) \
11109 __extension__ \
11110 ({ \
11111 poly16x8_t b_ = (b); \
11112 poly16_t a_ = (a); \
11113 poly16x8_t result; \
11114 __asm__ ("ins %0.h[%3], %w1" \
11115 : "=w"(result) \
11116 : "r"(a_), "0"(b_), "i"(c) \
11117 : /* No clobbers */); \
11118 result; \
11121 #define vsetq_lane_s8(a, b, c) \
11122 __extension__ \
11123 ({ \
11124 int8x16_t b_ = (b); \
11125 int8_t a_ = (a); \
11126 int8x16_t result; \
11127 __asm__ ("ins %0.b[%3], %w1" \
11128 : "=w"(result) \
11129 : "r"(a_), "0"(b_), "i"(c) \
11130 : /* No clobbers */); \
11131 result; \
11134 #define vsetq_lane_s16(a, b, c) \
11135 __extension__ \
11136 ({ \
11137 int16x8_t b_ = (b); \
11138 int16_t a_ = (a); \
11139 int16x8_t result; \
11140 __asm__ ("ins %0.h[%3], %w1" \
11141 : "=w"(result) \
11142 : "r"(a_), "0"(b_), "i"(c) \
11143 : /* No clobbers */); \
11144 result; \
11147 #define vsetq_lane_s32(a, b, c) \
11148 __extension__ \
11149 ({ \
11150 int32x4_t b_ = (b); \
11151 int32_t a_ = (a); \
11152 int32x4_t result; \
11153 __asm__ ("ins %0.s[%3], %w1" \
11154 : "=w"(result) \
11155 : "r"(a_), "0"(b_), "i"(c) \
11156 : /* No clobbers */); \
11157 result; \
11160 #define vsetq_lane_s64(a, b, c) \
11161 __extension__ \
11162 ({ \
11163 int64x2_t b_ = (b); \
11164 int64_t a_ = (a); \
11165 int64x2_t result; \
11166 __asm__ ("ins %0.d[%3], %x1" \
11167 : "=w"(result) \
11168 : "r"(a_), "0"(b_), "i"(c) \
11169 : /* No clobbers */); \
11170 result; \
11173 #define vsetq_lane_u8(a, b, c) \
11174 __extension__ \
11175 ({ \
11176 uint8x16_t b_ = (b); \
11177 uint8_t a_ = (a); \
11178 uint8x16_t result; \
11179 __asm__ ("ins %0.b[%3], %w1" \
11180 : "=w"(result) \
11181 : "r"(a_), "0"(b_), "i"(c) \
11182 : /* No clobbers */); \
11183 result; \
11186 #define vsetq_lane_u16(a, b, c) \
11187 __extension__ \
11188 ({ \
11189 uint16x8_t b_ = (b); \
11190 uint16_t a_ = (a); \
11191 uint16x8_t result; \
11192 __asm__ ("ins %0.h[%3], %w1" \
11193 : "=w"(result) \
11194 : "r"(a_), "0"(b_), "i"(c) \
11195 : /* No clobbers */); \
11196 result; \
11199 #define vsetq_lane_u32(a, b, c) \
11200 __extension__ \
11201 ({ \
11202 uint32x4_t b_ = (b); \
11203 uint32_t a_ = (a); \
11204 uint32x4_t result; \
11205 __asm__ ("ins %0.s[%3], %w1" \
11206 : "=w"(result) \
11207 : "r"(a_), "0"(b_), "i"(c) \
11208 : /* No clobbers */); \
11209 result; \
11212 #define vsetq_lane_u64(a, b, c) \
11213 __extension__ \
11214 ({ \
11215 uint64x2_t b_ = (b); \
11216 uint64_t a_ = (a); \
11217 uint64x2_t result; \
11218 __asm__ ("ins %0.d[%3], %x1" \
11219 : "=w"(result) \
11220 : "r"(a_), "0"(b_), "i"(c) \
11221 : /* No clobbers */); \
11222 result; \
11225 #define vshrn_high_n_s16(a, b, c) \
11226 __extension__ \
11227 ({ \
11228 int16x8_t b_ = (b); \
11229 int8x8_t a_ = (a); \
11230 int8x16_t result = vcombine_s8 \
11231 (a_, vcreate_s8 \
11232 (__AARCH64_UINT64_C (0x0))); \
11233 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11234 : "+w"(result) \
11235 : "w"(b_), "i"(c) \
11236 : /* No clobbers */); \
11237 result; \
11240 #define vshrn_high_n_s32(a, b, c) \
11241 __extension__ \
11242 ({ \
11243 int32x4_t b_ = (b); \
11244 int16x4_t a_ = (a); \
11245 int16x8_t result = vcombine_s16 \
11246 (a_, vcreate_s16 \
11247 (__AARCH64_UINT64_C (0x0))); \
11248 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11249 : "+w"(result) \
11250 : "w"(b_), "i"(c) \
11251 : /* No clobbers */); \
11252 result; \
11255 #define vshrn_high_n_s64(a, b, c) \
11256 __extension__ \
11257 ({ \
11258 int64x2_t b_ = (b); \
11259 int32x2_t a_ = (a); \
11260 int32x4_t result = vcombine_s32 \
11261 (a_, vcreate_s32 \
11262 (__AARCH64_UINT64_C (0x0))); \
11263 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11264 : "+w"(result) \
11265 : "w"(b_), "i"(c) \
11266 : /* No clobbers */); \
11267 result; \
11270 #define vshrn_high_n_u16(a, b, c) \
11271 __extension__ \
11272 ({ \
11273 uint16x8_t b_ = (b); \
11274 uint8x8_t a_ = (a); \
11275 uint8x16_t result = vcombine_u8 \
11276 (a_, vcreate_u8 \
11277 (__AARCH64_UINT64_C (0x0))); \
11278 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11279 : "+w"(result) \
11280 : "w"(b_), "i"(c) \
11281 : /* No clobbers */); \
11282 result; \
11285 #define vshrn_high_n_u32(a, b, c) \
11286 __extension__ \
11287 ({ \
11288 uint32x4_t b_ = (b); \
11289 uint16x4_t a_ = (a); \
11290 uint16x8_t result = vcombine_u16 \
11291 (a_, vcreate_u16 \
11292 (__AARCH64_UINT64_C (0x0))); \
11293 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11294 : "+w"(result) \
11295 : "w"(b_), "i"(c) \
11296 : /* No clobbers */); \
11297 result; \
11300 #define vshrn_high_n_u64(a, b, c) \
11301 __extension__ \
11302 ({ \
11303 uint64x2_t b_ = (b); \
11304 uint32x2_t a_ = (a); \
11305 uint32x4_t result = vcombine_u32 \
11306 (a_, vcreate_u32 \
11307 (__AARCH64_UINT64_C (0x0))); \
11308 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11309 : "+w"(result) \
11310 : "w"(b_), "i"(c) \
11311 : /* No clobbers */); \
11312 result; \
11315 #define vshrn_n_s16(a, b) \
11316 __extension__ \
11317 ({ \
11318 int16x8_t a_ = (a); \
11319 int8x8_t result; \
11320 __asm__ ("shrn %0.8b,%1.8h,%2" \
11321 : "=w"(result) \
11322 : "w"(a_), "i"(b) \
11323 : /* No clobbers */); \
11324 result; \
11327 #define vshrn_n_s32(a, b) \
11328 __extension__ \
11329 ({ \
11330 int32x4_t a_ = (a); \
11331 int16x4_t result; \
11332 __asm__ ("shrn %0.4h,%1.4s,%2" \
11333 : "=w"(result) \
11334 : "w"(a_), "i"(b) \
11335 : /* No clobbers */); \
11336 result; \
11339 #define vshrn_n_s64(a, b) \
11340 __extension__ \
11341 ({ \
11342 int64x2_t a_ = (a); \
11343 int32x2_t result; \
11344 __asm__ ("shrn %0.2s,%1.2d,%2" \
11345 : "=w"(result) \
11346 : "w"(a_), "i"(b) \
11347 : /* No clobbers */); \
11348 result; \
11351 #define vshrn_n_u16(a, b) \
11352 __extension__ \
11353 ({ \
11354 uint16x8_t a_ = (a); \
11355 uint8x8_t result; \
11356 __asm__ ("shrn %0.8b,%1.8h,%2" \
11357 : "=w"(result) \
11358 : "w"(a_), "i"(b) \
11359 : /* No clobbers */); \
11360 result; \
11363 #define vshrn_n_u32(a, b) \
11364 __extension__ \
11365 ({ \
11366 uint32x4_t a_ = (a); \
11367 uint16x4_t result; \
11368 __asm__ ("shrn %0.4h,%1.4s,%2" \
11369 : "=w"(result) \
11370 : "w"(a_), "i"(b) \
11371 : /* No clobbers */); \
11372 result; \
11375 #define vshrn_n_u64(a, b) \
11376 __extension__ \
11377 ({ \
11378 uint64x2_t a_ = (a); \
11379 uint32x2_t result; \
11380 __asm__ ("shrn %0.2s,%1.2d,%2" \
11381 : "=w"(result) \
11382 : "w"(a_), "i"(b) \
11383 : /* No clobbers */); \
11384 result; \
11387 #define vsli_n_p8(a, b, c) \
11388 __extension__ \
11389 ({ \
11390 poly8x8_t b_ = (b); \
11391 poly8x8_t a_ = (a); \
11392 poly8x8_t result; \
11393 __asm__ ("sli %0.8b,%2.8b,%3" \
11394 : "=w"(result) \
11395 : "0"(a_), "w"(b_), "i"(c) \
11396 : /* No clobbers */); \
11397 result; \
11400 #define vsli_n_p16(a, b, c) \
11401 __extension__ \
11402 ({ \
11403 poly16x4_t b_ = (b); \
11404 poly16x4_t a_ = (a); \
11405 poly16x4_t result; \
11406 __asm__ ("sli %0.4h,%2.4h,%3" \
11407 : "=w"(result) \
11408 : "0"(a_), "w"(b_), "i"(c) \
11409 : /* No clobbers */); \
11410 result; \
11413 #define vsliq_n_p8(a, b, c) \
11414 __extension__ \
11415 ({ \
11416 poly8x16_t b_ = (b); \
11417 poly8x16_t a_ = (a); \
11418 poly8x16_t result; \
11419 __asm__ ("sli %0.16b,%2.16b,%3" \
11420 : "=w"(result) \
11421 : "0"(a_), "w"(b_), "i"(c) \
11422 : /* No clobbers */); \
11423 result; \
11426 #define vsliq_n_p16(a, b, c) \
11427 __extension__ \
11428 ({ \
11429 poly16x8_t b_ = (b); \
11430 poly16x8_t a_ = (a); \
11431 poly16x8_t result; \
11432 __asm__ ("sli %0.8h,%2.8h,%3" \
11433 : "=w"(result) \
11434 : "0"(a_), "w"(b_), "i"(c) \
11435 : /* No clobbers */); \
11436 result; \
11439 #define vsri_n_p8(a, b, c) \
11440 __extension__ \
11441 ({ \
11442 poly8x8_t b_ = (b); \
11443 poly8x8_t a_ = (a); \
11444 poly8x8_t result; \
11445 __asm__ ("sri %0.8b,%2.8b,%3" \
11446 : "=w"(result) \
11447 : "0"(a_), "w"(b_), "i"(c) \
11448 : /* No clobbers */); \
11449 result; \
11452 #define vsri_n_p16(a, b, c) \
11453 __extension__ \
11454 ({ \
11455 poly16x4_t b_ = (b); \
11456 poly16x4_t a_ = (a); \
11457 poly16x4_t result; \
11458 __asm__ ("sri %0.4h,%2.4h,%3" \
11459 : "=w"(result) \
11460 : "0"(a_), "w"(b_), "i"(c) \
11461 : /* No clobbers */); \
11462 result; \
11465 #define vsriq_n_p8(a, b, c) \
11466 __extension__ \
11467 ({ \
11468 poly8x16_t b_ = (b); \
11469 poly8x16_t a_ = (a); \
11470 poly8x16_t result; \
11471 __asm__ ("sri %0.16b,%2.16b,%3" \
11472 : "=w"(result) \
11473 : "0"(a_), "w"(b_), "i"(c) \
11474 : /* No clobbers */); \
11475 result; \
11478 #define vsriq_n_p16(a, b, c) \
11479 __extension__ \
11480 ({ \
11481 poly16x8_t b_ = (b); \
11482 poly16x8_t a_ = (a); \
11483 poly16x8_t result; \
11484 __asm__ ("sri %0.8h,%2.8h,%3" \
11485 : "=w"(result) \
11486 : "0"(a_), "w"(b_), "i"(c) \
11487 : /* No clobbers */); \
11488 result; \
11491 #define vst1_lane_f32(a, b, c) \
11492 __extension__ \
11493 ({ \
11494 float32x2_t b_ = (b); \
11495 float32_t * a_ = (a); \
11496 __asm__ ("st1 {%1.s}[%2],[%0]" \
11498 : "r"(a_), "w"(b_), "i"(c) \
11499 : "memory"); \
11502 #define vst1_lane_f64(a, b, c) \
11503 __extension__ \
11504 ({ \
11505 float64x1_t b_ = (b); \
11506 float64_t * a_ = (a); \
11507 __asm__ ("st1 {%1.d}[%2],[%0]" \
11509 : "r"(a_), "w"(b_), "i"(c) \
11510 : "memory"); \
11513 #define vst1_lane_p8(a, b, c) \
11514 __extension__ \
11515 ({ \
11516 poly8x8_t b_ = (b); \
11517 poly8_t * a_ = (a); \
11518 __asm__ ("st1 {%1.b}[%2],[%0]" \
11520 : "r"(a_), "w"(b_), "i"(c) \
11521 : "memory"); \
11524 #define vst1_lane_p16(a, b, c) \
11525 __extension__ \
11526 ({ \
11527 poly16x4_t b_ = (b); \
11528 poly16_t * a_ = (a); \
11529 __asm__ ("st1 {%1.h}[%2],[%0]" \
11531 : "r"(a_), "w"(b_), "i"(c) \
11532 : "memory"); \
11535 #define vst1_lane_s8(a, b, c) \
11536 __extension__ \
11537 ({ \
11538 int8x8_t b_ = (b); \
11539 int8_t * a_ = (a); \
11540 __asm__ ("st1 {%1.b}[%2],[%0]" \
11542 : "r"(a_), "w"(b_), "i"(c) \
11543 : "memory"); \
11546 #define vst1_lane_s16(a, b, c) \
11547 __extension__ \
11548 ({ \
11549 int16x4_t b_ = (b); \
11550 int16_t * a_ = (a); \
11551 __asm__ ("st1 {%1.h}[%2],[%0]" \
11553 : "r"(a_), "w"(b_), "i"(c) \
11554 : "memory"); \
11557 #define vst1_lane_s32(a, b, c) \
11558 __extension__ \
11559 ({ \
11560 int32x2_t b_ = (b); \
11561 int32_t * a_ = (a); \
11562 __asm__ ("st1 {%1.s}[%2],[%0]" \
11564 : "r"(a_), "w"(b_), "i"(c) \
11565 : "memory"); \
11568 #define vst1_lane_s64(a, b, c) \
11569 __extension__ \
11570 ({ \
11571 int64x1_t b_ = (b); \
11572 int64_t * a_ = (a); \
11573 __asm__ ("st1 {%1.d}[%2],[%0]" \
11575 : "r"(a_), "w"(b_), "i"(c) \
11576 : "memory"); \
11579 #define vst1_lane_u8(a, b, c) \
11580 __extension__ \
11581 ({ \
11582 uint8x8_t b_ = (b); \
11583 uint8_t * a_ = (a); \
11584 __asm__ ("st1 {%1.b}[%2],[%0]" \
11586 : "r"(a_), "w"(b_), "i"(c) \
11587 : "memory"); \
11590 #define vst1_lane_u16(a, b, c) \
11591 __extension__ \
11592 ({ \
11593 uint16x4_t b_ = (b); \
11594 uint16_t * a_ = (a); \
11595 __asm__ ("st1 {%1.h}[%2],[%0]" \
11597 : "r"(a_), "w"(b_), "i"(c) \
11598 : "memory"); \
11601 #define vst1_lane_u32(a, b, c) \
11602 __extension__ \
11603 ({ \
11604 uint32x2_t b_ = (b); \
11605 uint32_t * a_ = (a); \
11606 __asm__ ("st1 {%1.s}[%2],[%0]" \
11608 : "r"(a_), "w"(b_), "i"(c) \
11609 : "memory"); \
11612 #define vst1_lane_u64(a, b, c) \
11613 __extension__ \
11614 ({ \
11615 uint64x1_t b_ = (b); \
11616 uint64_t * a_ = (a); \
11617 __asm__ ("st1 {%1.d}[%2],[%0]" \
11619 : "r"(a_), "w"(b_), "i"(c) \
11620 : "memory"); \
11624 #define vst1q_lane_f32(a, b, c) \
11625 __extension__ \
11626 ({ \
11627 float32x4_t b_ = (b); \
11628 float32_t * a_ = (a); \
11629 __asm__ ("st1 {%1.s}[%2],[%0]" \
11631 : "r"(a_), "w"(b_), "i"(c) \
11632 : "memory"); \
11635 #define vst1q_lane_f64(a, b, c) \
11636 __extension__ \
11637 ({ \
11638 float64x2_t b_ = (b); \
11639 float64_t * a_ = (a); \
11640 __asm__ ("st1 {%1.d}[%2],[%0]" \
11642 : "r"(a_), "w"(b_), "i"(c) \
11643 : "memory"); \
11646 #define vst1q_lane_p8(a, b, c) \
11647 __extension__ \
11648 ({ \
11649 poly8x16_t b_ = (b); \
11650 poly8_t * a_ = (a); \
11651 __asm__ ("st1 {%1.b}[%2],[%0]" \
11653 : "r"(a_), "w"(b_), "i"(c) \
11654 : "memory"); \
11657 #define vst1q_lane_p16(a, b, c) \
11658 __extension__ \
11659 ({ \
11660 poly16x8_t b_ = (b); \
11661 poly16_t * a_ = (a); \
11662 __asm__ ("st1 {%1.h}[%2],[%0]" \
11664 : "r"(a_), "w"(b_), "i"(c) \
11665 : "memory"); \
11668 #define vst1q_lane_s8(a, b, c) \
11669 __extension__ \
11670 ({ \
11671 int8x16_t b_ = (b); \
11672 int8_t * a_ = (a); \
11673 __asm__ ("st1 {%1.b}[%2],[%0]" \
11675 : "r"(a_), "w"(b_), "i"(c) \
11676 : "memory"); \
11679 #define vst1q_lane_s16(a, b, c) \
11680 __extension__ \
11681 ({ \
11682 int16x8_t b_ = (b); \
11683 int16_t * a_ = (a); \
11684 __asm__ ("st1 {%1.h}[%2],[%0]" \
11686 : "r"(a_), "w"(b_), "i"(c) \
11687 : "memory"); \
11690 #define vst1q_lane_s32(a, b, c) \
11691 __extension__ \
11692 ({ \
11693 int32x4_t b_ = (b); \
11694 int32_t * a_ = (a); \
11695 __asm__ ("st1 {%1.s}[%2],[%0]" \
11697 : "r"(a_), "w"(b_), "i"(c) \
11698 : "memory"); \
11701 #define vst1q_lane_s64(a, b, c) \
11702 __extension__ \
11703 ({ \
11704 int64x2_t b_ = (b); \
11705 int64_t * a_ = (a); \
11706 __asm__ ("st1 {%1.d}[%2],[%0]" \
11708 : "r"(a_), "w"(b_), "i"(c) \
11709 : "memory"); \
11712 #define vst1q_lane_u8(a, b, c) \
11713 __extension__ \
11714 ({ \
11715 uint8x16_t b_ = (b); \
11716 uint8_t * a_ = (a); \
11717 __asm__ ("st1 {%1.b}[%2],[%0]" \
11719 : "r"(a_), "w"(b_), "i"(c) \
11720 : "memory"); \
11723 #define vst1q_lane_u16(a, b, c) \
11724 __extension__ \
11725 ({ \
11726 uint16x8_t b_ = (b); \
11727 uint16_t * a_ = (a); \
11728 __asm__ ("st1 {%1.h}[%2],[%0]" \
11730 : "r"(a_), "w"(b_), "i"(c) \
11731 : "memory"); \
11734 #define vst1q_lane_u32(a, b, c) \
11735 __extension__ \
11736 ({ \
11737 uint32x4_t b_ = (b); \
11738 uint32_t * a_ = (a); \
11739 __asm__ ("st1 {%1.s}[%2],[%0]" \
11741 : "r"(a_), "w"(b_), "i"(c) \
11742 : "memory"); \
11745 #define vst1q_lane_u64(a, b, c) \
11746 __extension__ \
11747 ({ \
11748 uint64x2_t b_ = (b); \
11749 uint64_t * a_ = (a); \
11750 __asm__ ("st1 {%1.d}[%2],[%0]" \
11752 : "r"(a_), "w"(b_), "i"(c) \
11753 : "memory"); \
11756 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11757 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11759 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11760 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11761 : "+w"(result)
11762 : "w"(b), "w"(c)
11763 : /* No clobbers */);
11764 return result;
11767 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11768 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11770 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11771 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11772 : "+w"(result)
11773 : "w"(b), "w"(c)
11774 : /* No clobbers */);
11775 return result;
11778 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11779 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11781 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11782 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11783 : "+w"(result)
11784 : "w"(b), "w"(c)
11785 : /* No clobbers */);
11786 return result;
11789 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11790 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11792 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11793 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11794 : "+w"(result)
11795 : "w"(b), "w"(c)
11796 : /* No clobbers */);
11797 return result;
11800 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11801 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11803 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11804 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11805 : "+w"(result)
11806 : "w"(b), "w"(c)
11807 : /* No clobbers */);
11808 return result;
11811 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11812 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11814 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11815 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11816 : "+w"(result)
11817 : "w"(b), "w"(c)
11818 : /* No clobbers */);
11819 return result;
11822 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11823 vsubhn_s16 (int16x8_t a, int16x8_t b)
11825 int8x8_t result;
11826 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11827 : "=w"(result)
11828 : "w"(a), "w"(b)
11829 : /* No clobbers */);
11830 return result;
11833 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11834 vsubhn_s32 (int32x4_t a, int32x4_t b)
11836 int16x4_t result;
11837 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11838 : "=w"(result)
11839 : "w"(a), "w"(b)
11840 : /* No clobbers */);
11841 return result;
11844 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11845 vsubhn_s64 (int64x2_t a, int64x2_t b)
11847 int32x2_t result;
11848 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11849 : "=w"(result)
11850 : "w"(a), "w"(b)
11851 : /* No clobbers */);
11852 return result;
11855 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11856 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
11858 uint8x8_t result;
11859 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11860 : "=w"(result)
11861 : "w"(a), "w"(b)
11862 : /* No clobbers */);
11863 return result;
11866 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11867 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
11869 uint16x4_t result;
11870 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11871 : "=w"(result)
11872 : "w"(a), "w"(b)
11873 : /* No clobbers */);
11874 return result;
11877 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11878 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
11880 uint32x2_t result;
11881 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11882 : "=w"(result)
11883 : "w"(a), "w"(b)
11884 : /* No clobbers */);
11885 return result;
11888 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11889 vtst_p8 (poly8x8_t a, poly8x8_t b)
11891 uint8x8_t result;
11892 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
11893 : "=w"(result)
11894 : "w"(a), "w"(b)
11895 : /* No clobbers */);
11896 return result;
11899 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11900 vtst_p16 (poly16x4_t a, poly16x4_t b)
11902 uint16x4_t result;
11903 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
11904 : "=w"(result)
11905 : "w"(a), "w"(b)
11906 : /* No clobbers */);
11907 return result;
11910 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11911 vtstq_p8 (poly8x16_t a, poly8x16_t b)
11913 uint8x16_t result;
11914 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
11915 : "=w"(result)
11916 : "w"(a), "w"(b)
11917 : /* No clobbers */);
11918 return result;
11921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11922 vtstq_p16 (poly16x8_t a, poly16x8_t b)
11924 uint16x8_t result;
11925 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
11926 : "=w"(result)
11927 : "w"(a), "w"(b)
11928 : /* No clobbers */);
11929 return result;
11932 /* End of temporary inline asm implementations. */
11934 /* Start of temporary inline asm for vldn, vstn and friends. */
11936 /* Create struct element types for duplicating loads.
11938 Create 2 element structures of:
11940 +------+----+----+----+----+
11941 | | 8 | 16 | 32 | 64 |
11942 +------+----+----+----+----+
11943 |int | Y | Y | N | N |
11944 +------+----+----+----+----+
11945 |uint | Y | Y | N | N |
11946 +------+----+----+----+----+
11947 |float | - | - | N | N |
11948 +------+----+----+----+----+
11949 |poly | Y | Y | - | - |
11950 +------+----+----+----+----+
11952 Create 3 element structures of:
11954 +------+----+----+----+----+
11955 | | 8 | 16 | 32 | 64 |
11956 +------+----+----+----+----+
11957 |int | Y | Y | Y | Y |
11958 +------+----+----+----+----+
11959 |uint | Y | Y | Y | Y |
11960 +------+----+----+----+----+
11961 |float | - | - | Y | Y |
11962 +------+----+----+----+----+
11963 |poly | Y | Y | - | - |
11964 +------+----+----+----+----+
11966 Create 4 element structures of:
11968 +------+----+----+----+----+
11969 | | 8 | 16 | 32 | 64 |
11970 +------+----+----+----+----+
11971 |int | Y | N | N | Y |
11972 +------+----+----+----+----+
11973 |uint | Y | N | N | Y |
11974 +------+----+----+----+----+
11975 |float | - | - | N | Y |
11976 +------+----+----+----+----+
11977 |poly | Y | N | - | - |
11978 +------+----+----+----+----+
11980 This is required for casting memory reference. */
11981 #define __STRUCTN(t, sz, nelem) \
11982 typedef struct t ## sz ## x ## nelem ## _t { \
11983 t ## sz ## _t val[nelem]; \
11984 } t ## sz ## x ## nelem ## _t;
11986 /* 2-element structs. */
11987 __STRUCTN (int, 8, 2)
11988 __STRUCTN (int, 16, 2)
11989 __STRUCTN (uint, 8, 2)
11990 __STRUCTN (uint, 16, 2)
11991 __STRUCTN (poly, 8, 2)
11992 __STRUCTN (poly, 16, 2)
11993 /* 3-element structs. */
11994 __STRUCTN (int, 8, 3)
11995 __STRUCTN (int, 16, 3)
11996 __STRUCTN (int, 32, 3)
11997 __STRUCTN (int, 64, 3)
11998 __STRUCTN (uint, 8, 3)
11999 __STRUCTN (uint, 16, 3)
12000 __STRUCTN (uint, 32, 3)
12001 __STRUCTN (uint, 64, 3)
12002 __STRUCTN (float, 32, 3)
12003 __STRUCTN (float, 64, 3)
12004 __STRUCTN (poly, 8, 3)
12005 __STRUCTN (poly, 16, 3)
12006 /* 4-element structs. */
12007 __STRUCTN (int, 8, 4)
12008 __STRUCTN (int, 64, 4)
12009 __STRUCTN (uint, 8, 4)
12010 __STRUCTN (uint, 64, 4)
12011 __STRUCTN (poly, 8, 4)
12012 __STRUCTN (float, 64, 4)
12013 #undef __STRUCTN
12015 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
12016 regsuffix, funcsuffix, Q) \
12017 __extension__ static __inline rettype \
12018 __attribute__ ((__always_inline__)) \
12019 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12021 rettype result; \
12022 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
12023 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
12024 : "=Q"(result) \
12025 : "Q"(*(const structtype *)ptr) \
12026 : "memory", "v16", "v17"); \
12027 return result; \
12030 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
12031 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
12032 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
12033 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
12034 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
12035 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
12036 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
12037 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
12038 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
12039 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
12040 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
12041 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
12042 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
12043 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
12044 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
12045 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
12046 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
12047 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
12048 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
12049 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
12050 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
12051 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
12052 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
12053 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
12055 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
12056 lnsuffix, funcsuffix, Q) \
12057 __extension__ static __inline rettype \
12058 __attribute__ ((__always_inline__)) \
12059 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12060 rettype b, const int c) \
12062 rettype result; \
12063 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
12064 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
12065 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
12066 : "=Q"(result) \
12067 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12068 : "memory", "v16", "v17"); \
12069 return result; \
12072 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
12073 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
12074 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
12075 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
12076 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
12077 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
12078 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
12079 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
12080 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
12081 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
12082 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
12083 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
12084 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
12085 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
12086 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
12087 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
12088 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
12089 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
12090 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
12091 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
12092 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
12093 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
12094 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
12095 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
12097 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
12098 regsuffix, funcsuffix, Q) \
12099 __extension__ static __inline rettype \
12100 __attribute__ ((__always_inline__)) \
12101 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12103 rettype result; \
12104 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12105 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12106 : "=Q"(result) \
12107 : "Q"(*(const structtype *)ptr) \
12108 : "memory", "v16", "v17", "v18"); \
12109 return result; \
12112 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
12113 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
12114 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
12115 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
12116 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
12117 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
12118 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
12119 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
12120 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
12121 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
12122 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
12123 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
12124 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
12125 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
12126 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
12127 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
12128 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
12129 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
12130 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
12131 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
12132 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
12133 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
12134 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
12135 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
12137 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
12138 lnsuffix, funcsuffix, Q) \
12139 __extension__ static __inline rettype \
12140 __attribute__ ((__always_inline__)) \
12141 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12142 rettype b, const int c) \
12144 rettype result; \
12145 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12146 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
12147 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12148 : "=Q"(result) \
12149 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12150 : "memory", "v16", "v17", "v18"); \
12151 return result; \
12154 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
12155 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
12156 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
12157 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
12158 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
12159 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
12160 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
12161 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
12162 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
12163 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
12164 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
12165 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
12166 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
12167 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
12168 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
12169 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
12170 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
12171 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
12172 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
12173 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
12174 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
12175 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
12176 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
12177 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
12179 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
12180 regsuffix, funcsuffix, Q) \
12181 __extension__ static __inline rettype \
12182 __attribute__ ((__always_inline__)) \
12183 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12185 rettype result; \
12186 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12187 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12188 : "=Q"(result) \
12189 : "Q"(*(const structtype *)ptr) \
12190 : "memory", "v16", "v17", "v18", "v19"); \
12191 return result; \
12194 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
12195 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
12196 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
12197 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
12198 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
12199 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
12200 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
12201 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
12202 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
12203 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
12204 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
12205 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
12206 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
12207 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
12208 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
12209 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
12210 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
12211 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
12212 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
12213 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
12214 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
12215 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
12216 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
12217 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
12219 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
12220 lnsuffix, funcsuffix, Q) \
12221 __extension__ static __inline rettype \
12222 __attribute__ ((__always_inline__)) \
12223 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12224 rettype b, const int c) \
12226 rettype result; \
12227 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12228 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
12229 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12230 : "=Q"(result) \
12231 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12232 : "memory", "v16", "v17", "v18", "v19"); \
12233 return result; \
12236 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
12237 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
12238 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
12239 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
12240 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
12241 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
12242 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
12243 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
12244 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
12245 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
12246 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
12247 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
12248 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
12249 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
12250 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
12251 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
12252 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
12253 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
12254 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
12255 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
12256 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
12257 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
12258 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
12259 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
12261 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
12262 mode, ptr_mode, funcsuffix, signedtype) \
12263 __extension__ static __inline void \
12264 __attribute__ ((__always_inline__)) \
12265 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
12266 intype __b, const int __c) \
12268 __builtin_aarch64_simd_oi __o; \
12269 largetype __temp; \
12270 __temp.val[0] \
12271 = vcombine_##funcsuffix (__b.val[0], \
12272 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12273 __temp.val[1] \
12274 = vcombine_##funcsuffix (__b.val[1], \
12275 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12276 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12277 (signedtype) __temp.val[0], 0); \
12278 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12279 (signedtype) __temp.val[1], 1); \
12280 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12281 __ptr, __o, __c); \
12284 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
12285 float32x4_t)
12286 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
12287 float64x2_t)
12288 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
12289 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
12290 int16x8_t)
12291 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
12292 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
12293 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
12294 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
12295 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
12296 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
12297 int16x8_t)
12298 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
12299 int32x4_t)
12300 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
12301 int64x2_t)
12303 #undef __ST2_LANE_FUNC
12304 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12305 __extension__ static __inline void \
12306 __attribute__ ((__always_inline__)) \
12307 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
12308 intype __b, const int __c) \
12310 union { intype __i; \
12311 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
12312 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12313 __ptr, __temp.__o, __c); \
12316 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
12317 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
12318 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
12319 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
12320 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
12321 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
12322 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
12323 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
12324 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
12325 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
12326 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
12327 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
12329 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
12330 mode, ptr_mode, funcsuffix, signedtype) \
12331 __extension__ static __inline void \
12332 __attribute__ ((__always_inline__)) \
12333 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
12334 intype __b, const int __c) \
12336 __builtin_aarch64_simd_ci __o; \
12337 largetype __temp; \
12338 __temp.val[0] \
12339 = vcombine_##funcsuffix (__b.val[0], \
12340 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12341 __temp.val[1] \
12342 = vcombine_##funcsuffix (__b.val[1], \
12343 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12344 __temp.val[2] \
12345 = vcombine_##funcsuffix (__b.val[2], \
12346 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12347 __o = __builtin_aarch64_set_qregci##mode (__o, \
12348 (signedtype) __temp.val[0], 0); \
12349 __o = __builtin_aarch64_set_qregci##mode (__o, \
12350 (signedtype) __temp.val[1], 1); \
12351 __o = __builtin_aarch64_set_qregci##mode (__o, \
12352 (signedtype) __temp.val[2], 2); \
12353 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12354 __ptr, __o, __c); \
12357 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
12358 float32x4_t)
12359 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
12360 float64x2_t)
12361 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
12362 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
12363 int16x8_t)
12364 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
12365 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
12366 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
12367 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
12368 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
12369 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
12370 int16x8_t)
12371 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
12372 int32x4_t)
12373 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
12374 int64x2_t)
12376 #undef __ST3_LANE_FUNC
12377 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12378 __extension__ static __inline void \
12379 __attribute__ ((__always_inline__)) \
12380 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
12381 intype __b, const int __c) \
12383 union { intype __i; \
12384 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
12385 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12386 __ptr, __temp.__o, __c); \
12389 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
12390 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
12391 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
12392 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
12393 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
12394 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
12395 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
12396 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
12397 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
12398 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
12399 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
12400 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
12402 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
12403 mode, ptr_mode, funcsuffix, signedtype) \
12404 __extension__ static __inline void \
12405 __attribute__ ((__always_inline__)) \
12406 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
12407 intype __b, const int __c) \
12409 __builtin_aarch64_simd_xi __o; \
12410 largetype __temp; \
12411 __temp.val[0] \
12412 = vcombine_##funcsuffix (__b.val[0], \
12413 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12414 __temp.val[1] \
12415 = vcombine_##funcsuffix (__b.val[1], \
12416 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12417 __temp.val[2] \
12418 = vcombine_##funcsuffix (__b.val[2], \
12419 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12420 __temp.val[3] \
12421 = vcombine_##funcsuffix (__b.val[3], \
12422 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12423 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12424 (signedtype) __temp.val[0], 0); \
12425 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12426 (signedtype) __temp.val[1], 1); \
12427 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12428 (signedtype) __temp.val[2], 2); \
12429 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12430 (signedtype) __temp.val[3], 3); \
12431 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12432 __ptr, __o, __c); \
12435 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
12436 float32x4_t)
12437 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
12438 float64x2_t)
12439 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
12440 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
12441 int16x8_t)
12442 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
12443 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
12444 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
12445 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
12446 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
12447 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
12448 int16x8_t)
12449 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
12450 int32x4_t)
12451 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
12452 int64x2_t)
12454 #undef __ST4_LANE_FUNC
12455 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12456 __extension__ static __inline void \
12457 __attribute__ ((__always_inline__)) \
12458 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
12459 intype __b, const int __c) \
12461 union { intype __i; \
12462 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
12463 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12464 __ptr, __temp.__o, __c); \
12467 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
12468 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
12469 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
12470 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
12471 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
12472 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
12473 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
12474 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
12475 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
12476 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
12477 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
12478 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
12480 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12481 vaddlv_s32 (int32x2_t a)
12483 int64_t result;
12484 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12485 return result;
12488 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12489 vaddlv_u32 (uint32x2_t a)
12491 uint64_t result;
12492 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12493 return result;
12496 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12497 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12499 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
12502 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12503 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12505 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
12508 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12509 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12511 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
12514 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12515 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12517 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
12520 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12521 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12523 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
12526 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12527 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12529 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
12532 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12533 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12535 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
12538 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12539 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12541 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
12544 /* Table intrinsics. */
12546 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12547 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
12549 poly8x8_t result;
12550 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12551 : "=w"(result)
12552 : "w"(a), "w"(b)
12553 : /* No clobbers */);
12554 return result;
12557 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12558 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
12560 int8x8_t result;
12561 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12562 : "=w"(result)
12563 : "w"(a), "w"(b)
12564 : /* No clobbers */);
12565 return result;
12568 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12569 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
12571 uint8x8_t result;
12572 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12573 : "=w"(result)
12574 : "w"(a), "w"(b)
12575 : /* No clobbers */);
12576 return result;
12579 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12580 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
12582 poly8x16_t result;
12583 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12584 : "=w"(result)
12585 : "w"(a), "w"(b)
12586 : /* No clobbers */);
12587 return result;
12590 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12591 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
12593 int8x16_t result;
12594 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12595 : "=w"(result)
12596 : "w"(a), "w"(b)
12597 : /* No clobbers */);
12598 return result;
12601 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12602 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
12604 uint8x16_t result;
12605 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12606 : "=w"(result)
12607 : "w"(a), "w"(b)
12608 : /* No clobbers */);
12609 return result;
12612 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12613 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
12615 int8x8_t result;
12616 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12617 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12618 :"=w"(result)
12619 :"Q"(tab),"w"(idx)
12620 :"memory", "v16", "v17");
12621 return result;
12624 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12625 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
12627 uint8x8_t result;
12628 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12629 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12630 :"=w"(result)
12631 :"Q"(tab),"w"(idx)
12632 :"memory", "v16", "v17");
12633 return result;
12636 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12637 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
12639 poly8x8_t result;
12640 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12641 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12642 :"=w"(result)
12643 :"Q"(tab),"w"(idx)
12644 :"memory", "v16", "v17");
12645 return result;
12648 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12649 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
12651 int8x16_t result;
12652 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12653 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12654 :"=w"(result)
12655 :"Q"(tab),"w"(idx)
12656 :"memory", "v16", "v17");
12657 return result;
12660 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12661 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
12663 uint8x16_t result;
12664 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12665 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12666 :"=w"(result)
12667 :"Q"(tab),"w"(idx)
12668 :"memory", "v16", "v17");
12669 return result;
12672 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12673 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
12675 poly8x16_t result;
12676 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12677 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12678 :"=w"(result)
12679 :"Q"(tab),"w"(idx)
12680 :"memory", "v16", "v17");
12681 return result;
12684 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12685 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
12687 int8x8_t result;
12688 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12689 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12690 :"=w"(result)
12691 :"Q"(tab),"w"(idx)
12692 :"memory", "v16", "v17", "v18");
12693 return result;
12696 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12697 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
12699 uint8x8_t result;
12700 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12701 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12702 :"=w"(result)
12703 :"Q"(tab),"w"(idx)
12704 :"memory", "v16", "v17", "v18");
12705 return result;
12708 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12709 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
12711 poly8x8_t result;
12712 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12713 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12714 :"=w"(result)
12715 :"Q"(tab),"w"(idx)
12716 :"memory", "v16", "v17", "v18");
12717 return result;
12720 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12721 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
12723 int8x16_t result;
12724 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12725 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12726 :"=w"(result)
12727 :"Q"(tab),"w"(idx)
12728 :"memory", "v16", "v17", "v18");
12729 return result;
12732 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12733 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
12735 uint8x16_t result;
12736 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12737 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12738 :"=w"(result)
12739 :"Q"(tab),"w"(idx)
12740 :"memory", "v16", "v17", "v18");
12741 return result;
12744 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12745 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
12747 poly8x16_t result;
12748 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12749 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12750 :"=w"(result)
12751 :"Q"(tab),"w"(idx)
12752 :"memory", "v16", "v17", "v18");
12753 return result;
12756 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12757 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
12759 int8x8_t result;
12760 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12761 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12762 :"=w"(result)
12763 :"Q"(tab),"w"(idx)
12764 :"memory", "v16", "v17", "v18", "v19");
12765 return result;
12768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12769 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
12771 uint8x8_t result;
12772 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12773 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12774 :"=w"(result)
12775 :"Q"(tab),"w"(idx)
12776 :"memory", "v16", "v17", "v18", "v19");
12777 return result;
12780 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12781 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
12783 poly8x8_t result;
12784 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12785 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12786 :"=w"(result)
12787 :"Q"(tab),"w"(idx)
12788 :"memory", "v16", "v17", "v18", "v19");
12789 return result;
12793 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12794 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
12796 int8x16_t result;
12797 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12798 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12799 :"=w"(result)
12800 :"Q"(tab),"w"(idx)
12801 :"memory", "v16", "v17", "v18", "v19");
12802 return result;
12805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12806 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
12808 uint8x16_t result;
12809 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12810 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12811 :"=w"(result)
12812 :"Q"(tab),"w"(idx)
12813 :"memory", "v16", "v17", "v18", "v19");
12814 return result;
12817 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12818 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
12820 poly8x16_t result;
12821 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12822 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12823 :"=w"(result)
12824 :"Q"(tab),"w"(idx)
12825 :"memory", "v16", "v17", "v18", "v19");
12826 return result;
12830 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12831 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
12833 int8x8_t result = r;
12834 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12835 : "+w"(result)
12836 : "w"(tab), "w"(idx)
12837 : /* No clobbers */);
12838 return result;
12841 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12842 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
12844 uint8x8_t result = r;
12845 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12846 : "+w"(result)
12847 : "w"(tab), "w"(idx)
12848 : /* No clobbers */);
12849 return result;
12852 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12853 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
12855 poly8x8_t result = r;
12856 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12857 : "+w"(result)
12858 : "w"(tab), "w"(idx)
12859 : /* No clobbers */);
12860 return result;
12863 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12864 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
12866 int8x16_t result = r;
12867 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12868 : "+w"(result)
12869 : "w"(tab), "w"(idx)
12870 : /* No clobbers */);
12871 return result;
12874 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12875 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
12877 uint8x16_t result = r;
12878 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12879 : "+w"(result)
12880 : "w"(tab), "w"(idx)
12881 : /* No clobbers */);
12882 return result;
12885 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12886 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
12888 poly8x16_t result = r;
12889 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12890 : "+w"(result)
12891 : "w"(tab), "w"(idx)
12892 : /* No clobbers */);
12893 return result;
12896 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12897 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
12899 int8x8_t result = r;
12900 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12901 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12902 :"+w"(result)
12903 :"Q"(tab),"w"(idx)
12904 :"memory", "v16", "v17");
12905 return result;
12908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12909 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
12911 uint8x8_t result = r;
12912 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12913 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12914 :"+w"(result)
12915 :"Q"(tab),"w"(idx)
12916 :"memory", "v16", "v17");
12917 return result;
12920 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12921 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
12923 poly8x8_t result = r;
12924 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12925 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12926 :"+w"(result)
12927 :"Q"(tab),"w"(idx)
12928 :"memory", "v16", "v17");
12929 return result;
12933 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12934 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
12936 int8x16_t result = r;
12937 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12938 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12939 :"+w"(result)
12940 :"Q"(tab),"w"(idx)
12941 :"memory", "v16", "v17");
12942 return result;
12945 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12946 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
12948 uint8x16_t result = r;
12949 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12950 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12951 :"+w"(result)
12952 :"Q"(tab),"w"(idx)
12953 :"memory", "v16", "v17");
12954 return result;
12957 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12958 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
12960 poly8x16_t result = r;
12961 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12962 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12963 :"+w"(result)
12964 :"Q"(tab),"w"(idx)
12965 :"memory", "v16", "v17");
12966 return result;
12970 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12971 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
12973 int8x8_t result = r;
12974 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12975 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12976 :"+w"(result)
12977 :"Q"(tab),"w"(idx)
12978 :"memory", "v16", "v17", "v18");
12979 return result;
12982 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12983 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
12985 uint8x8_t result = r;
12986 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12987 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12988 :"+w"(result)
12989 :"Q"(tab),"w"(idx)
12990 :"memory", "v16", "v17", "v18");
12991 return result;
12994 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12995 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
12997 poly8x8_t result = r;
12998 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12999 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
13000 :"+w"(result)
13001 :"Q"(tab),"w"(idx)
13002 :"memory", "v16", "v17", "v18");
13003 return result;
13007 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13008 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
13010 int8x16_t result = r;
13011 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13012 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13013 :"+w"(result)
13014 :"Q"(tab),"w"(idx)
13015 :"memory", "v16", "v17", "v18");
13016 return result;
13019 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13020 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
13022 uint8x16_t result = r;
13023 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13024 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13025 :"+w"(result)
13026 :"Q"(tab),"w"(idx)
13027 :"memory", "v16", "v17", "v18");
13028 return result;
13031 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13032 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
13034 poly8x16_t result = r;
13035 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
13036 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
13037 :"+w"(result)
13038 :"Q"(tab),"w"(idx)
13039 :"memory", "v16", "v17", "v18");
13040 return result;
13044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13045 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
13047 int8x8_t result = r;
13048 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13049 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13050 :"+w"(result)
13051 :"Q"(tab),"w"(idx)
13052 :"memory", "v16", "v17", "v18", "v19");
13053 return result;
13056 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13057 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
13059 uint8x8_t result = r;
13060 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13061 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13062 :"+w"(result)
13063 :"Q"(tab),"w"(idx)
13064 :"memory", "v16", "v17", "v18", "v19");
13065 return result;
13068 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13069 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
13071 poly8x8_t result = r;
13072 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13073 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13074 :"+w"(result)
13075 :"Q"(tab),"w"(idx)
13076 :"memory", "v16", "v17", "v18", "v19");
13077 return result;
13081 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13082 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
13084 int8x16_t result = r;
13085 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13086 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13087 :"+w"(result)
13088 :"Q"(tab),"w"(idx)
13089 :"memory", "v16", "v17", "v18", "v19");
13090 return result;
13093 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13094 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
13096 uint8x16_t result = r;
13097 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13098 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13099 :"+w"(result)
13100 :"Q"(tab),"w"(idx)
13101 :"memory", "v16", "v17", "v18", "v19");
13102 return result;
13105 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13106 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
13108 poly8x16_t result = r;
13109 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13110 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13111 :"+w"(result)
13112 :"Q"(tab),"w"(idx)
13113 :"memory", "v16", "v17", "v18", "v19");
13114 return result;
13117 /* V7 legacy table intrinsics. */
13119 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13120 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
13122 int8x8_t result;
13123 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13124 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13125 : "=w"(result)
13126 : "w"(temp), "w"(idx)
13127 : /* No clobbers */);
13128 return result;
13131 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13132 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
13134 uint8x8_t result;
13135 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13136 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13137 : "=w"(result)
13138 : "w"(temp), "w"(idx)
13139 : /* No clobbers */);
13140 return result;
13143 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13144 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
13146 poly8x8_t result;
13147 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13148 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13149 : "=w"(result)
13150 : "w"(temp), "w"(idx)
13151 : /* No clobbers */);
13152 return result;
13155 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13156 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
13158 int8x8_t result;
13159 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13160 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13161 : "=w"(result)
13162 : "w"(temp), "w"(idx)
13163 : /* No clobbers */);
13164 return result;
13167 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13168 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
13170 uint8x8_t result;
13171 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13172 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13173 : "=w"(result)
13174 : "w"(temp), "w"(idx)
13175 : /* No clobbers */);
13176 return result;
13179 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13180 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
13182 poly8x8_t result;
13183 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13184 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13185 : "=w"(result)
13186 : "w"(temp), "w"(idx)
13187 : /* No clobbers */);
13188 return result;
13191 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13192 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
13194 int8x8_t result;
13195 int8x16x2_t temp;
13196 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13197 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13198 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13199 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13200 : "=w"(result)
13201 : "Q"(temp), "w"(idx)
13202 : "v16", "v17", "memory");
13203 return result;
13206 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13207 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
13209 uint8x8_t result;
13210 uint8x16x2_t temp;
13211 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13212 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13213 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13214 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13215 : "=w"(result)
13216 : "Q"(temp), "w"(idx)
13217 : "v16", "v17", "memory");
13218 return result;
13221 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13222 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
13224 poly8x8_t result;
13225 poly8x16x2_t temp;
13226 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13227 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13228 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13229 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13230 : "=w"(result)
13231 : "Q"(temp), "w"(idx)
13232 : "v16", "v17", "memory");
13233 return result;
13236 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13237 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
13239 int8x8_t result;
13240 int8x16x2_t temp;
13241 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13242 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13243 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13244 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13245 : "=w"(result)
13246 : "Q"(temp), "w"(idx)
13247 : "v16", "v17", "memory");
13248 return result;
13251 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13252 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
13254 uint8x8_t result;
13255 uint8x16x2_t temp;
13256 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13257 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13258 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13259 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13260 : "=w"(result)
13261 : "Q"(temp), "w"(idx)
13262 : "v16", "v17", "memory");
13263 return result;
13266 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13267 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
13269 poly8x8_t result;
13270 poly8x16x2_t temp;
13271 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13272 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13273 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13274 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13275 : "=w"(result)
13276 : "Q"(temp), "w"(idx)
13277 : "v16", "v17", "memory");
13278 return result;
13281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13282 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
13284 int8x8_t result = r;
13285 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13286 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13287 : "+w"(result)
13288 : "w"(temp), "w"(idx)
13289 : /* No clobbers */);
13290 return result;
13293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13294 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
13296 uint8x8_t result = r;
13297 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13298 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13299 : "+w"(result)
13300 : "w"(temp), "w"(idx)
13301 : /* No clobbers */);
13302 return result;
13305 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13306 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
13308 poly8x8_t result = r;
13309 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13310 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13311 : "+w"(result)
13312 : "w"(temp), "w"(idx)
13313 : /* No clobbers */);
13314 return result;
13317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13318 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
13320 int8x8_t result = r;
13321 int8x16x2_t temp;
13322 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13323 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13324 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13325 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13326 : "+w"(result)
13327 : "Q"(temp), "w"(idx)
13328 : "v16", "v17", "memory");
13329 return result;
13332 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13333 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
13335 uint8x8_t result = r;
13336 uint8x16x2_t temp;
13337 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13338 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13339 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13340 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13341 : "+w"(result)
13342 : "Q"(temp), "w"(idx)
13343 : "v16", "v17", "memory");
13344 return result;
13347 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13348 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
13350 poly8x8_t result = r;
13351 poly8x16x2_t temp;
13352 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13353 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13354 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13355 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13356 : "+w"(result)
13357 : "Q"(temp), "w"(idx)
13358 : "v16", "v17", "memory");
13359 return result;
13362 /* End of temporary inline asm. */
13364 /* Start of optimal implementations in approved order. */
13366 /* vabs */
13368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13369 vabs_f32 (float32x2_t __a)
13371 return __builtin_aarch64_absv2sf (__a);
13374 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13375 vabs_f64 (float64x1_t __a)
13377 return (float64x1_t) {__builtin_fabs (__a[0])};
13380 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13381 vabs_s8 (int8x8_t __a)
13383 return __builtin_aarch64_absv8qi (__a);
13386 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13387 vabs_s16 (int16x4_t __a)
13389 return __builtin_aarch64_absv4hi (__a);
13392 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13393 vabs_s32 (int32x2_t __a)
13395 return __builtin_aarch64_absv2si (__a);
13398 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13399 vabs_s64 (int64x1_t __a)
13401 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
13404 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13405 vabsq_f32 (float32x4_t __a)
13407 return __builtin_aarch64_absv4sf (__a);
13410 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13411 vabsq_f64 (float64x2_t __a)
13413 return __builtin_aarch64_absv2df (__a);
13416 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13417 vabsq_s8 (int8x16_t __a)
13419 return __builtin_aarch64_absv16qi (__a);
13422 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13423 vabsq_s16 (int16x8_t __a)
13425 return __builtin_aarch64_absv8hi (__a);
13428 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13429 vabsq_s32 (int32x4_t __a)
13431 return __builtin_aarch64_absv4si (__a);
13434 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13435 vabsq_s64 (int64x2_t __a)
13437 return __builtin_aarch64_absv2di (__a);
13440 /* vadd */
13442 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13443 vaddd_s64 (int64_t __a, int64_t __b)
13445 return __a + __b;
13448 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13449 vaddd_u64 (uint64_t __a, uint64_t __b)
13451 return __a + __b;
13454 /* vaddv */
13456 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13457 vaddv_s8 (int8x8_t __a)
13459 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
13462 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13463 vaddv_s16 (int16x4_t __a)
13465 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
13468 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13469 vaddv_s32 (int32x2_t __a)
13471 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
13474 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13475 vaddv_u8 (uint8x8_t __a)
13477 return vget_lane_u8 ((uint8x8_t)
13478 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
13482 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13483 vaddv_u16 (uint16x4_t __a)
13485 return vget_lane_u16 ((uint16x4_t)
13486 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
13490 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13491 vaddv_u32 (uint32x2_t __a)
13493 return vget_lane_u32 ((uint32x2_t)
13494 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
13498 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13499 vaddvq_s8 (int8x16_t __a)
13501 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
13505 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13506 vaddvq_s16 (int16x8_t __a)
13508 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
13511 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13512 vaddvq_s32 (int32x4_t __a)
13514 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
13517 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13518 vaddvq_s64 (int64x2_t __a)
13520 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
13523 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13524 vaddvq_u8 (uint8x16_t __a)
13526 return vgetq_lane_u8 ((uint8x16_t)
13527 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
13531 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13532 vaddvq_u16 (uint16x8_t __a)
13534 return vgetq_lane_u16 ((uint16x8_t)
13535 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
13539 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13540 vaddvq_u32 (uint32x4_t __a)
13542 return vgetq_lane_u32 ((uint32x4_t)
13543 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
13547 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13548 vaddvq_u64 (uint64x2_t __a)
13550 return vgetq_lane_u64 ((uint64x2_t)
13551 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
13555 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13556 vaddv_f32 (float32x2_t __a)
13558 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
13559 return vget_lane_f32 (__t, 0);
13562 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13563 vaddvq_f32 (float32x4_t __a)
13565 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
13566 return vgetq_lane_f32 (__t, 0);
13569 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13570 vaddvq_f64 (float64x2_t __a)
13572 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
13573 return vgetq_lane_f64 (__t, 0);
13576 /* vbsl */
13578 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13579 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
13581 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
13584 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13585 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
13587 return (float64x1_t)
13588 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
13591 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13592 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
13594 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
13597 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13598 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
13600 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
13603 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13604 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
13606 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
13609 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13610 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
13612 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
13615 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13616 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
13618 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
13621 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13622 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
13624 return (int64x1_t)
13625 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
13628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13629 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
13631 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
13634 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13635 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
13637 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
13640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13641 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
13643 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
13646 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13647 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
13649 return (uint64x1_t)
13650 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
13653 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13654 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
13656 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
13659 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13660 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
13662 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
13665 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13666 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
13668 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
13671 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13672 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
13674 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
13677 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13678 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
13680 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
13683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13684 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
13686 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
13689 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13690 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
13692 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
13695 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13696 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
13698 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
13701 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13702 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
13704 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
13707 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13708 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
13710 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
13713 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13714 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
13716 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
13719 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13720 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
13722 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
13725 #ifdef __ARM_FEATURE_CRYPTO
13727 /* vaes */
13729 static __inline uint8x16_t
13730 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
13732 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
13735 static __inline uint8x16_t
13736 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
13738 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
13741 static __inline uint8x16_t
13742 vaesmcq_u8 (uint8x16_t data)
13744 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
13747 static __inline uint8x16_t
13748 vaesimcq_u8 (uint8x16_t data)
13750 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
13753 #endif
13755 /* vcage */
13757 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13758 vcage_f64 (float64x1_t __a, float64x1_t __b)
13760 return vabs_f64 (__a) >= vabs_f64 (__b);
13763 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13764 vcages_f32 (float32_t __a, float32_t __b)
13766 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
13769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13770 vcage_f32 (float32x2_t __a, float32x2_t __b)
13772 return vabs_f32 (__a) >= vabs_f32 (__b);
13775 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13776 vcageq_f32 (float32x4_t __a, float32x4_t __b)
13778 return vabsq_f32 (__a) >= vabsq_f32 (__b);
13781 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13782 vcaged_f64 (float64_t __a, float64_t __b)
13784 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
13787 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13788 vcageq_f64 (float64x2_t __a, float64x2_t __b)
13790 return vabsq_f64 (__a) >= vabsq_f64 (__b);
13793 /* vcagt */
13795 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13796 vcagts_f32 (float32_t __a, float32_t __b)
13798 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
13801 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13802 vcagt_f32 (float32x2_t __a, float32x2_t __b)
13804 return vabs_f32 (__a) > vabs_f32 (__b);
13807 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13808 vcagt_f64 (float64x1_t __a, float64x1_t __b)
13810 return vabs_f64 (__a) > vabs_f64 (__b);
13813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13814 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
13816 return vabsq_f32 (__a) > vabsq_f32 (__b);
13819 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13820 vcagtd_f64 (float64_t __a, float64_t __b)
13822 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
13825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13826 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
13828 return vabsq_f64 (__a) > vabsq_f64 (__b);
13831 /* vcale */
13833 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13834 vcale_f32 (float32x2_t __a, float32x2_t __b)
13836 return vabs_f32 (__a) <= vabs_f32 (__b);
13839 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13840 vcale_f64 (float64x1_t __a, float64x1_t __b)
13842 return vabs_f64 (__a) <= vabs_f64 (__b);
13845 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13846 vcaled_f64 (float64_t __a, float64_t __b)
13848 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
13851 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13852 vcales_f32 (float32_t __a, float32_t __b)
13854 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
13857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13858 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
13860 return vabsq_f32 (__a) <= vabsq_f32 (__b);
13863 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13864 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
13866 return vabsq_f64 (__a) <= vabsq_f64 (__b);
13869 /* vcalt */
13871 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13872 vcalt_f32 (float32x2_t __a, float32x2_t __b)
13874 return vabs_f32 (__a) < vabs_f32 (__b);
13877 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13878 vcalt_f64 (float64x1_t __a, float64x1_t __b)
13880 return vabs_f64 (__a) < vabs_f64 (__b);
13883 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13884 vcaltd_f64 (float64_t __a, float64_t __b)
13886 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
13889 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13890 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
13892 return vabsq_f32 (__a) < vabsq_f32 (__b);
13895 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13896 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
13898 return vabsq_f64 (__a) < vabsq_f64 (__b);
13901 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13902 vcalts_f32 (float32_t __a, float32_t __b)
13904 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
13907 /* vceq - vector. */
13909 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13910 vceq_f32 (float32x2_t __a, float32x2_t __b)
13912 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
13915 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13916 vceq_f64 (float64x1_t __a, float64x1_t __b)
13918 return (uint64x1_t) (__a == __b);
13921 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13922 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
13924 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
13925 (int8x8_t) __b);
13928 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13929 vceq_s8 (int8x8_t __a, int8x8_t __b)
13931 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
13934 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13935 vceq_s16 (int16x4_t __a, int16x4_t __b)
13937 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
13940 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13941 vceq_s32 (int32x2_t __a, int32x2_t __b)
13943 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
13946 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13947 vceq_s64 (int64x1_t __a, int64x1_t __b)
13949 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13952 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13953 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
13955 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
13956 (int8x8_t) __b);
13959 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13960 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
13962 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
13963 (int16x4_t) __b);
13966 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13967 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
13969 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
13970 (int32x2_t) __b);
13973 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13974 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
13976 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13979 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13980 vceqq_f32 (float32x4_t __a, float32x4_t __b)
13982 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
13985 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13986 vceqq_f64 (float64x2_t __a, float64x2_t __b)
13988 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
13991 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13992 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
13994 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
13995 (int8x16_t) __b);
13998 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13999 vceqq_s8 (int8x16_t __a, int8x16_t __b)
14001 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14004 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14005 vceqq_s16 (int16x8_t __a, int16x8_t __b)
14007 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14011 vceqq_s32 (int32x4_t __a, int32x4_t __b)
14013 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14016 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14017 vceqq_s64 (int64x2_t __a, int64x2_t __b)
14019 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14022 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14023 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
14025 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14026 (int8x16_t) __b);
14029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14030 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
14032 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14033 (int16x8_t) __b);
14036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14037 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
14039 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14040 (int32x4_t) __b);
14043 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14044 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
14046 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14047 (int64x2_t) __b);
14050 /* vceq - scalar. */
14052 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14053 vceqs_f32 (float32_t __a, float32_t __b)
14055 return __a == __b ? -1 : 0;
14058 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14059 vceqd_s64 (int64_t __a, int64_t __b)
14061 return __a == __b ? -1ll : 0ll;
14064 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14065 vceqd_u64 (uint64_t __a, uint64_t __b)
14067 return __a == __b ? -1ll : 0ll;
14070 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14071 vceqd_f64 (float64_t __a, float64_t __b)
14073 return __a == __b ? -1ll : 0ll;
14076 /* vceqz - vector. */
14078 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14079 vceqz_f32 (float32x2_t __a)
14081 float32x2_t __b = {0.0f, 0.0f};
14082 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
14085 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14086 vceqz_f64 (float64x1_t __a)
14088 return (uint64x1_t) (__a == (float64x1_t) {0.0});
14091 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14092 vceqz_p8 (poly8x8_t __a)
14094 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14095 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14096 (int8x8_t) __b);
14099 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14100 vceqz_s8 (int8x8_t __a)
14102 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14103 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
14106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14107 vceqz_s16 (int16x4_t __a)
14109 int16x4_t __b = {0, 0, 0, 0};
14110 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
14113 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14114 vceqz_s32 (int32x2_t __a)
14116 int32x2_t __b = {0, 0};
14117 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
14120 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14121 vceqz_s64 (int64x1_t __a)
14123 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14126 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14127 vceqz_u8 (uint8x8_t __a)
14129 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14130 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14131 (int8x8_t) __b);
14134 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14135 vceqz_u16 (uint16x4_t __a)
14137 uint16x4_t __b = {0, 0, 0, 0};
14138 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
14139 (int16x4_t) __b);
14142 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14143 vceqz_u32 (uint32x2_t __a)
14145 uint32x2_t __b = {0, 0};
14146 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
14147 (int32x2_t) __b);
14150 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14151 vceqz_u64 (uint64x1_t __a)
14153 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14156 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14157 vceqzq_f32 (float32x4_t __a)
14159 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14160 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14163 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14164 vceqzq_f64 (float64x2_t __a)
14166 float64x2_t __b = {0.0, 0.0};
14167 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14170 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14171 vceqzq_p8 (poly8x16_t __a)
14173 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14174 0, 0, 0, 0, 0, 0, 0, 0};
14175 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14176 (int8x16_t) __b);
14179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14180 vceqzq_s8 (int8x16_t __a)
14182 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14183 0, 0, 0, 0, 0, 0, 0, 0};
14184 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14187 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14188 vceqzq_s16 (int16x8_t __a)
14190 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14191 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14194 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14195 vceqzq_s32 (int32x4_t __a)
14197 int32x4_t __b = {0, 0, 0, 0};
14198 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14202 vceqzq_s64 (int64x2_t __a)
14204 int64x2_t __b = {0, 0};
14205 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14209 vceqzq_u8 (uint8x16_t __a)
14211 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14212 0, 0, 0, 0, 0, 0, 0, 0};
14213 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14214 (int8x16_t) __b);
14217 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14218 vceqzq_u16 (uint16x8_t __a)
14220 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14221 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14222 (int16x8_t) __b);
14225 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14226 vceqzq_u32 (uint32x4_t __a)
14228 uint32x4_t __b = {0, 0, 0, 0};
14229 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14230 (int32x4_t) __b);
14233 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14234 vceqzq_u64 (uint64x2_t __a)
14236 uint64x2_t __b = {0, 0};
14237 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14238 (int64x2_t) __b);
14241 /* vceqz - scalar. */
14243 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14244 vceqzs_f32 (float32_t __a)
14246 return __a == 0.0f ? -1 : 0;
14249 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14250 vceqzd_s64 (int64_t __a)
14252 return __a == 0 ? -1ll : 0ll;
14255 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14256 vceqzd_u64 (uint64_t __a)
14258 return __a == 0 ? -1ll : 0ll;
14261 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14262 vceqzd_f64 (float64_t __a)
14264 return __a == 0.0 ? -1ll : 0ll;
14267 /* vcge - vector. */
14269 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14270 vcge_f32 (float32x2_t __a, float32x2_t __b)
14272 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14275 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14276 vcge_f64 (float64x1_t __a, float64x1_t __b)
14278 return (uint64x1_t) (__a >= __b);
14281 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14282 vcge_s8 (int8x8_t __a, int8x8_t __b)
14284 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14288 vcge_s16 (int16x4_t __a, int16x4_t __b)
14290 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14293 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14294 vcge_s32 (int32x2_t __a, int32x2_t __b)
14296 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14299 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14300 vcge_s64 (int64x1_t __a, int64x1_t __b)
14302 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14305 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14306 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
14308 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
14309 (int8x8_t) __b);
14312 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14313 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
14315 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
14316 (int16x4_t) __b);
14319 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14320 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
14322 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
14323 (int32x2_t) __b);
14326 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14327 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
14329 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14332 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14333 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
14335 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14338 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14339 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
14341 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14344 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14345 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
14347 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14350 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14351 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
14353 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14357 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
14359 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14362 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14363 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
14365 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14368 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14369 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
14371 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
14372 (int8x16_t) __b);
14375 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14376 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
14378 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
14379 (int16x8_t) __b);
14382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14383 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
14385 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
14386 (int32x4_t) __b);
14389 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14390 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
14392 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
14393 (int64x2_t) __b);
14396 /* vcge - scalar. */
14398 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14399 vcges_f32 (float32_t __a, float32_t __b)
14401 return __a >= __b ? -1 : 0;
14404 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14405 vcged_s64 (int64_t __a, int64_t __b)
14407 return __a >= __b ? -1ll : 0ll;
14410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14411 vcged_u64 (uint64_t __a, uint64_t __b)
14413 return __a >= __b ? -1ll : 0ll;
14416 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14417 vcged_f64 (float64_t __a, float64_t __b)
14419 return __a >= __b ? -1ll : 0ll;
14422 /* vcgez - vector. */
14424 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14425 vcgez_f32 (float32x2_t __a)
14427 float32x2_t __b = {0.0f, 0.0f};
14428 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14431 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14432 vcgez_f64 (float64x1_t __a)
14434 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
14437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14438 vcgez_s8 (int8x8_t __a)
14440 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14441 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14444 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14445 vcgez_s16 (int16x4_t __a)
14447 int16x4_t __b = {0, 0, 0, 0};
14448 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14451 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14452 vcgez_s32 (int32x2_t __a)
14454 int32x2_t __b = {0, 0};
14455 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14458 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14459 vcgez_s64 (int64x1_t __a)
14461 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
14464 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14465 vcgezq_f32 (float32x4_t __a)
14467 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14468 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14471 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14472 vcgezq_f64 (float64x2_t __a)
14474 float64x2_t __b = {0.0, 0.0};
14475 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14478 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14479 vcgezq_s8 (int8x16_t __a)
14481 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14482 0, 0, 0, 0, 0, 0, 0, 0};
14483 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14486 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14487 vcgezq_s16 (int16x8_t __a)
14489 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14490 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14493 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14494 vcgezq_s32 (int32x4_t __a)
14496 int32x4_t __b = {0, 0, 0, 0};
14497 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14500 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14501 vcgezq_s64 (int64x2_t __a)
14503 int64x2_t __b = {0, 0};
14504 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14507 /* vcgez - scalar. */
14509 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14510 vcgezs_f32 (float32_t __a)
14512 return __a >= 0.0f ? -1 : 0;
14515 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14516 vcgezd_s64 (int64_t __a)
14518 return __a >= 0 ? -1ll : 0ll;
14521 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14522 vcgezd_f64 (float64_t __a)
14524 return __a >= 0.0 ? -1ll : 0ll;
14527 /* vcgt - vector. */
14529 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14530 vcgt_f32 (float32x2_t __a, float32x2_t __b)
14532 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14535 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14536 vcgt_f64 (float64x1_t __a, float64x1_t __b)
14538 return (uint64x1_t) (__a > __b);
14541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14542 vcgt_s8 (int8x8_t __a, int8x8_t __b)
14544 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14547 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14548 vcgt_s16 (int16x4_t __a, int16x4_t __b)
14550 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14553 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14554 vcgt_s32 (int32x2_t __a, int32x2_t __b)
14556 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14560 vcgt_s64 (int64x1_t __a, int64x1_t __b)
14562 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14565 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14566 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
14568 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
14569 (int8x8_t) __b);
14572 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14573 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
14575 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
14576 (int16x4_t) __b);
14579 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14580 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
14582 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
14583 (int32x2_t) __b);
14586 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14587 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
14589 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14592 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14593 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
14595 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14598 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14599 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
14601 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14604 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14605 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
14607 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14610 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14611 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
14613 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14616 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14617 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
14619 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14622 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14623 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
14625 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14628 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14629 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
14631 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
14632 (int8x16_t) __b);
14635 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14636 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
14638 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
14639 (int16x8_t) __b);
14642 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14643 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
14645 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
14646 (int32x4_t) __b);
14649 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14650 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
14652 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
14653 (int64x2_t) __b);
14656 /* vcgt - scalar. */
14658 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14659 vcgts_f32 (float32_t __a, float32_t __b)
14661 return __a > __b ? -1 : 0;
14664 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14665 vcgtd_s64 (int64_t __a, int64_t __b)
14667 return __a > __b ? -1ll : 0ll;
14670 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14671 vcgtd_u64 (uint64_t __a, uint64_t __b)
14673 return __a > __b ? -1ll : 0ll;
14676 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14677 vcgtd_f64 (float64_t __a, float64_t __b)
14679 return __a > __b ? -1ll : 0ll;
14682 /* vcgtz - vector. */
14684 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14685 vcgtz_f32 (float32x2_t __a)
14687 float32x2_t __b = {0.0f, 0.0f};
14688 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14691 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14692 vcgtz_f64 (float64x1_t __a)
14694 return (uint64x1_t) (__a > (float64x1_t) {0.0});
14697 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14698 vcgtz_s8 (int8x8_t __a)
14700 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14701 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14705 vcgtz_s16 (int16x4_t __a)
14707 int16x4_t __b = {0, 0, 0, 0};
14708 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14712 vcgtz_s32 (int32x2_t __a)
14714 int32x2_t __b = {0, 0};
14715 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14718 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14719 vcgtz_s64 (int64x1_t __a)
14721 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
14724 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14725 vcgtzq_f32 (float32x4_t __a)
14727 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14728 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14731 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14732 vcgtzq_f64 (float64x2_t __a)
14734 float64x2_t __b = {0.0, 0.0};
14735 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14738 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14739 vcgtzq_s8 (int8x16_t __a)
14741 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14742 0, 0, 0, 0, 0, 0, 0, 0};
14743 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14746 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14747 vcgtzq_s16 (int16x8_t __a)
14749 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14750 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14753 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14754 vcgtzq_s32 (int32x4_t __a)
14756 int32x4_t __b = {0, 0, 0, 0};
14757 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14760 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14761 vcgtzq_s64 (int64x2_t __a)
14763 int64x2_t __b = {0, 0};
14764 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14767 /* vcgtz - scalar. */
14769 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14770 vcgtzs_f32 (float32_t __a)
14772 return __a > 0.0f ? -1 : 0;
14775 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14776 vcgtzd_s64 (int64_t __a)
14778 return __a > 0 ? -1ll : 0ll;
14781 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14782 vcgtzd_f64 (float64_t __a)
14784 return __a > 0.0 ? -1ll : 0ll;
14787 /* vcle - vector. */
14789 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14790 vcle_f32 (float32x2_t __a, float32x2_t __b)
14792 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
14795 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14796 vcle_f64 (float64x1_t __a, float64x1_t __b)
14798 return (uint64x1_t) (__a <= __b);
14801 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14802 vcle_s8 (int8x8_t __a, int8x8_t __b)
14804 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
14807 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14808 vcle_s16 (int16x4_t __a, int16x4_t __b)
14810 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
14813 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14814 vcle_s32 (int32x2_t __a, int32x2_t __b)
14816 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
14819 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14820 vcle_s64 (int64x1_t __a, int64x1_t __b)
14822 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14825 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14826 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
14828 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
14829 (int8x8_t) __a);
14832 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14833 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
14835 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
14836 (int16x4_t) __a);
14839 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14840 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
14842 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
14843 (int32x2_t) __a);
14846 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14847 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
14849 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14852 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14853 vcleq_f32 (float32x4_t __a, float32x4_t __b)
14855 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
14858 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14859 vcleq_f64 (float64x2_t __a, float64x2_t __b)
14861 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
14864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14865 vcleq_s8 (int8x16_t __a, int8x16_t __b)
14867 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
14870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14871 vcleq_s16 (int16x8_t __a, int16x8_t __b)
14873 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
14876 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14877 vcleq_s32 (int32x4_t __a, int32x4_t __b)
14879 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
14882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14883 vcleq_s64 (int64x2_t __a, int64x2_t __b)
14885 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
14888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14889 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
14891 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
14892 (int8x16_t) __a);
14895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14896 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
14898 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
14899 (int16x8_t) __a);
14902 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14903 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
14905 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
14906 (int32x4_t) __a);
14909 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14910 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
14912 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
14913 (int64x2_t) __a);
14916 /* vcle - scalar. */
14918 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14919 vcles_f32 (float32_t __a, float32_t __b)
14921 return __a <= __b ? -1 : 0;
14924 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14925 vcled_s64 (int64_t __a, int64_t __b)
14927 return __a <= __b ? -1ll : 0ll;
14930 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14931 vcled_u64 (uint64_t __a, uint64_t __b)
14933 return __a <= __b ? -1ll : 0ll;
14936 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14937 vcled_f64 (float64_t __a, float64_t __b)
14939 return __a <= __b ? -1ll : 0ll;
14942 /* vclez - vector. */
14944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14945 vclez_f32 (float32x2_t __a)
14947 float32x2_t __b = {0.0f, 0.0f};
14948 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
14951 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14952 vclez_f64 (float64x1_t __a)
14954 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
14957 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14958 vclez_s8 (int8x8_t __a)
14960 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14961 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
14964 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14965 vclez_s16 (int16x4_t __a)
14967 int16x4_t __b = {0, 0, 0, 0};
14968 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
14971 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14972 vclez_s32 (int32x2_t __a)
14974 int32x2_t __b = {0, 0};
14975 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
14978 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14979 vclez_s64 (int64x1_t __a)
14981 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
14984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14985 vclezq_f32 (float32x4_t __a)
14987 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14988 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
14991 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14992 vclezq_f64 (float64x2_t __a)
14994 float64x2_t __b = {0.0, 0.0};
14995 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
14998 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14999 vclezq_s8 (int8x16_t __a)
15001 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15002 0, 0, 0, 0, 0, 0, 0, 0};
15003 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
15006 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15007 vclezq_s16 (int16x8_t __a)
15009 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15010 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
15013 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15014 vclezq_s32 (int32x4_t __a)
15016 int32x4_t __b = {0, 0, 0, 0};
15017 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
15020 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15021 vclezq_s64 (int64x2_t __a)
15023 int64x2_t __b = {0, 0};
15024 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
15027 /* vclez - scalar. */
15029 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15030 vclezs_f32 (float32_t __a)
15032 return __a <= 0.0f ? -1 : 0;
15035 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15036 vclezd_s64 (int64_t __a)
15038 return __a <= 0 ? -1ll : 0ll;
15041 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15042 vclezd_f64 (float64_t __a)
15044 return __a <= 0.0 ? -1ll : 0ll;
15047 /* vclt - vector. */
15049 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15050 vclt_f32 (float32x2_t __a, float32x2_t __b)
15052 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
15055 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15056 vclt_f64 (float64x1_t __a, float64x1_t __b)
15058 return (uint64x1_t) (__a < __b);
15061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15062 vclt_s8 (int8x8_t __a, int8x8_t __b)
15064 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
15067 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15068 vclt_s16 (int16x4_t __a, int16x4_t __b)
15070 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
15073 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15074 vclt_s32 (int32x2_t __a, int32x2_t __b)
15076 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
15079 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15080 vclt_s64 (int64x1_t __a, int64x1_t __b)
15082 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15086 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
15088 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
15089 (int8x8_t) __a);
15092 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15093 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
15095 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
15096 (int16x4_t) __a);
15099 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15100 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
15102 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
15103 (int32x2_t) __a);
15106 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15107 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
15109 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15113 vcltq_f32 (float32x4_t __a, float32x4_t __b)
15115 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
15118 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15119 vcltq_f64 (float64x2_t __a, float64x2_t __b)
15121 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
15124 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15125 vcltq_s8 (int8x16_t __a, int8x16_t __b)
15127 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
15130 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15131 vcltq_s16 (int16x8_t __a, int16x8_t __b)
15133 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
15136 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15137 vcltq_s32 (int32x4_t __a, int32x4_t __b)
15139 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
15142 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15143 vcltq_s64 (int64x2_t __a, int64x2_t __b)
15145 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
15148 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15149 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
15151 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
15152 (int8x16_t) __a);
15155 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15156 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
15158 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
15159 (int16x8_t) __a);
15162 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15163 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
15165 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
15166 (int32x4_t) __a);
15169 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15170 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
15172 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
15173 (int64x2_t) __a);
15176 /* vclt - scalar. */
15178 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15179 vclts_f32 (float32_t __a, float32_t __b)
15181 return __a < __b ? -1 : 0;
15184 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15185 vcltd_s64 (int64_t __a, int64_t __b)
15187 return __a < __b ? -1ll : 0ll;
15190 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15191 vcltd_u64 (uint64_t __a, uint64_t __b)
15193 return __a < __b ? -1ll : 0ll;
15196 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15197 vcltd_f64 (float64_t __a, float64_t __b)
15199 return __a < __b ? -1ll : 0ll;
15202 /* vcltz - vector. */
15204 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15205 vcltz_f32 (float32x2_t __a)
15207 float32x2_t __b = {0.0f, 0.0f};
15208 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
15211 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15212 vcltz_f64 (float64x1_t __a)
15214 return (uint64x1_t) (__a < (float64x1_t) {0.0});
15217 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15218 vcltz_s8 (int8x8_t __a)
15220 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15221 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
15224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15225 vcltz_s16 (int16x4_t __a)
15227 int16x4_t __b = {0, 0, 0, 0};
15228 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
15231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15232 vcltz_s32 (int32x2_t __a)
15234 int32x2_t __b = {0, 0};
15235 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
15238 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15239 vcltz_s64 (int64x1_t __a)
15241 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
15244 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15245 vcltzq_f32 (float32x4_t __a)
15247 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15248 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
15251 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15252 vcltzq_f64 (float64x2_t __a)
15254 float64x2_t __b = {0.0, 0.0};
15255 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
15258 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15259 vcltzq_s8 (int8x16_t __a)
15261 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15262 0, 0, 0, 0, 0, 0, 0, 0};
15263 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
15266 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15267 vcltzq_s16 (int16x8_t __a)
15269 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15270 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
15273 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15274 vcltzq_s32 (int32x4_t __a)
15276 int32x4_t __b = {0, 0, 0, 0};
15277 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
15280 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15281 vcltzq_s64 (int64x2_t __a)
15283 int64x2_t __b = {0, 0};
15284 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
15287 /* vcltz - scalar. */
15289 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15290 vcltzs_f32 (float32_t __a)
15292 return __a < 0.0f ? -1 : 0;
15295 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15296 vcltzd_s64 (int64_t __a)
15298 return __a < 0 ? -1ll : 0ll;
15301 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15302 vcltzd_f64 (float64_t __a)
15304 return __a < 0.0 ? -1ll : 0ll;
15307 /* vclz. */
15309 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15310 vclz_s8 (int8x8_t __a)
15312 return __builtin_aarch64_clzv8qi (__a);
15315 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15316 vclz_s16 (int16x4_t __a)
15318 return __builtin_aarch64_clzv4hi (__a);
15321 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15322 vclz_s32 (int32x2_t __a)
15324 return __builtin_aarch64_clzv2si (__a);
15327 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15328 vclz_u8 (uint8x8_t __a)
15330 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
15333 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15334 vclz_u16 (uint16x4_t __a)
15336 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
15339 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15340 vclz_u32 (uint32x2_t __a)
15342 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
15345 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15346 vclzq_s8 (int8x16_t __a)
15348 return __builtin_aarch64_clzv16qi (__a);
15351 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15352 vclzq_s16 (int16x8_t __a)
15354 return __builtin_aarch64_clzv8hi (__a);
15357 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15358 vclzq_s32 (int32x4_t __a)
15360 return __builtin_aarch64_clzv4si (__a);
15363 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15364 vclzq_u8 (uint8x16_t __a)
15366 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
15369 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15370 vclzq_u16 (uint16x8_t __a)
15372 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
15375 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15376 vclzq_u32 (uint32x4_t __a)
15378 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
15381 /* vcvt (double -> float). */
15383 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15384 vcvt_f32_f64 (float64x2_t __a)
15386 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
15389 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15390 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
15392 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
15395 /* vcvt (float -> double). */
15397 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15398 vcvt_f64_f32 (float32x2_t __a)
15401 return __builtin_aarch64_float_extend_lo_v2df (__a);
15404 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15405 vcvt_high_f64_f32 (float32x4_t __a)
15407 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
15410 /* vcvt (<u>int -> float) */
15412 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15413 vcvtd_f64_s64 (int64_t __a)
15415 return (float64_t) __a;
15418 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15419 vcvtd_f64_u64 (uint64_t __a)
15421 return (float64_t) __a;
15424 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15425 vcvts_f32_s32 (int32_t __a)
15427 return (float32_t) __a;
15430 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15431 vcvts_f32_u32 (uint32_t __a)
15433 return (float32_t) __a;
15436 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15437 vcvt_f32_s32 (int32x2_t __a)
15439 return __builtin_aarch64_floatv2siv2sf (__a);
15442 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15443 vcvt_f32_u32 (uint32x2_t __a)
15445 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
15448 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15449 vcvtq_f32_s32 (int32x4_t __a)
15451 return __builtin_aarch64_floatv4siv4sf (__a);
15454 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15455 vcvtq_f32_u32 (uint32x4_t __a)
15457 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
15460 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15461 vcvtq_f64_s64 (int64x2_t __a)
15463 return __builtin_aarch64_floatv2div2df (__a);
15466 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15467 vcvtq_f64_u64 (uint64x2_t __a)
15469 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
15472 /* vcvt (float -> <u>int) */
15474 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15475 vcvtd_s64_f64 (float64_t __a)
15477 return (int64_t) __a;
15480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15481 vcvtd_u64_f64 (float64_t __a)
15483 return (uint64_t) __a;
15486 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15487 vcvts_s32_f32 (float32_t __a)
15489 return (int32_t) __a;
15492 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15493 vcvts_u32_f32 (float32_t __a)
15495 return (uint32_t) __a;
15498 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15499 vcvt_s32_f32 (float32x2_t __a)
15501 return __builtin_aarch64_lbtruncv2sfv2si (__a);
15504 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15505 vcvt_u32_f32 (float32x2_t __a)
15507 /* TODO: This cast should go away when builtins have
15508 their correct types. */
15509 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
15512 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15513 vcvtq_s32_f32 (float32x4_t __a)
15515 return __builtin_aarch64_lbtruncv4sfv4si (__a);
15518 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15519 vcvtq_u32_f32 (float32x4_t __a)
15521 /* TODO: This cast should go away when builtins have
15522 their correct types. */
15523 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
15526 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15527 vcvtq_s64_f64 (float64x2_t __a)
15529 return __builtin_aarch64_lbtruncv2dfv2di (__a);
15532 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15533 vcvtq_u64_f64 (float64x2_t __a)
15535 /* TODO: This cast should go away when builtins have
15536 their correct types. */
15537 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
15540 /* vcvta */
15542 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15543 vcvtad_s64_f64 (float64_t __a)
15545 return __builtin_aarch64_lrounddfdi (__a);
15548 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15549 vcvtad_u64_f64 (float64_t __a)
15551 return __builtin_aarch64_lroundudfdi (__a);
15554 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15555 vcvtas_s32_f32 (float32_t __a)
15557 return __builtin_aarch64_lroundsfsi (__a);
15560 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15561 vcvtas_u32_f32 (float32_t __a)
15563 return __builtin_aarch64_lroundusfsi (__a);
15566 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15567 vcvta_s32_f32 (float32x2_t __a)
15569 return __builtin_aarch64_lroundv2sfv2si (__a);
15572 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15573 vcvta_u32_f32 (float32x2_t __a)
15575 /* TODO: This cast should go away when builtins have
15576 their correct types. */
15577 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
15580 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15581 vcvtaq_s32_f32 (float32x4_t __a)
15583 return __builtin_aarch64_lroundv4sfv4si (__a);
15586 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15587 vcvtaq_u32_f32 (float32x4_t __a)
15589 /* TODO: This cast should go away when builtins have
15590 their correct types. */
15591 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
15594 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15595 vcvtaq_s64_f64 (float64x2_t __a)
15597 return __builtin_aarch64_lroundv2dfv2di (__a);
15600 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15601 vcvtaq_u64_f64 (float64x2_t __a)
15603 /* TODO: This cast should go away when builtins have
15604 their correct types. */
15605 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
15608 /* vcvtm */
15610 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15611 vcvtmd_s64_f64 (float64_t __a)
15613 return __builtin_llfloor (__a);
15616 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15617 vcvtmd_u64_f64 (float64_t __a)
15619 return __builtin_aarch64_lfloorudfdi (__a);
15622 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15623 vcvtms_s32_f32 (float32_t __a)
15625 return __builtin_ifloorf (__a);
15628 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15629 vcvtms_u32_f32 (float32_t __a)
15631 return __builtin_aarch64_lfloorusfsi (__a);
15634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15635 vcvtm_s32_f32 (float32x2_t __a)
15637 return __builtin_aarch64_lfloorv2sfv2si (__a);
15640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15641 vcvtm_u32_f32 (float32x2_t __a)
15643 /* TODO: This cast should go away when builtins have
15644 their correct types. */
15645 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
15648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15649 vcvtmq_s32_f32 (float32x4_t __a)
15651 return __builtin_aarch64_lfloorv4sfv4si (__a);
15654 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15655 vcvtmq_u32_f32 (float32x4_t __a)
15657 /* TODO: This cast should go away when builtins have
15658 their correct types. */
15659 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
15662 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15663 vcvtmq_s64_f64 (float64x2_t __a)
15665 return __builtin_aarch64_lfloorv2dfv2di (__a);
15668 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15669 vcvtmq_u64_f64 (float64x2_t __a)
15671 /* TODO: This cast should go away when builtins have
15672 their correct types. */
15673 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
15676 /* vcvtn */
15678 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15679 vcvtnd_s64_f64 (float64_t __a)
15681 return __builtin_aarch64_lfrintndfdi (__a);
15684 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15685 vcvtnd_u64_f64 (float64_t __a)
15687 return __builtin_aarch64_lfrintnudfdi (__a);
15690 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15691 vcvtns_s32_f32 (float32_t __a)
15693 return __builtin_aarch64_lfrintnsfsi (__a);
15696 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15697 vcvtns_u32_f32 (float32_t __a)
15699 return __builtin_aarch64_lfrintnusfsi (__a);
15702 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15703 vcvtn_s32_f32 (float32x2_t __a)
15705 return __builtin_aarch64_lfrintnv2sfv2si (__a);
15708 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15709 vcvtn_u32_f32 (float32x2_t __a)
15711 /* TODO: This cast should go away when builtins have
15712 their correct types. */
15713 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
15716 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15717 vcvtnq_s32_f32 (float32x4_t __a)
15719 return __builtin_aarch64_lfrintnv4sfv4si (__a);
15722 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15723 vcvtnq_u32_f32 (float32x4_t __a)
15725 /* TODO: This cast should go away when builtins have
15726 their correct types. */
15727 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
15730 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15731 vcvtnq_s64_f64 (float64x2_t __a)
15733 return __builtin_aarch64_lfrintnv2dfv2di (__a);
15736 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15737 vcvtnq_u64_f64 (float64x2_t __a)
15739 /* TODO: This cast should go away when builtins have
15740 their correct types. */
15741 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
15744 /* vcvtp */
15746 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15747 vcvtpd_s64_f64 (float64_t __a)
15749 return __builtin_llceil (__a);
15752 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15753 vcvtpd_u64_f64 (float64_t __a)
15755 return __builtin_aarch64_lceiludfdi (__a);
15758 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15759 vcvtps_s32_f32 (float32_t __a)
15761 return __builtin_iceilf (__a);
15764 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15765 vcvtps_u32_f32 (float32_t __a)
15767 return __builtin_aarch64_lceilusfsi (__a);
15770 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15771 vcvtp_s32_f32 (float32x2_t __a)
15773 return __builtin_aarch64_lceilv2sfv2si (__a);
15776 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15777 vcvtp_u32_f32 (float32x2_t __a)
15779 /* TODO: This cast should go away when builtins have
15780 their correct types. */
15781 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
15784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15785 vcvtpq_s32_f32 (float32x4_t __a)
15787 return __builtin_aarch64_lceilv4sfv4si (__a);
15790 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15791 vcvtpq_u32_f32 (float32x4_t __a)
15793 /* TODO: This cast should go away when builtins have
15794 their correct types. */
15795 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
15798 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15799 vcvtpq_s64_f64 (float64x2_t __a)
15801 return __builtin_aarch64_lceilv2dfv2di (__a);
15804 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15805 vcvtpq_u64_f64 (float64x2_t __a)
15807 /* TODO: This cast should go away when builtins have
15808 their correct types. */
15809 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
15812 /* vdup_n */
15814 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15815 vdup_n_f32 (float32_t __a)
15817 return (float32x2_t) {__a, __a};
15820 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15821 vdup_n_f64 (float64_t __a)
15823 return (float64x1_t) {__a};
15826 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15827 vdup_n_p8 (poly8_t __a)
15829 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15832 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15833 vdup_n_p16 (poly16_t __a)
15835 return (poly16x4_t) {__a, __a, __a, __a};
15838 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15839 vdup_n_s8 (int8_t __a)
15841 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15844 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15845 vdup_n_s16 (int16_t __a)
15847 return (int16x4_t) {__a, __a, __a, __a};
15850 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15851 vdup_n_s32 (int32_t __a)
15853 return (int32x2_t) {__a, __a};
15856 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15857 vdup_n_s64 (int64_t __a)
15859 return (int64x1_t) {__a};
15862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15863 vdup_n_u8 (uint8_t __a)
15865 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15868 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15869 vdup_n_u16 (uint16_t __a)
15871 return (uint16x4_t) {__a, __a, __a, __a};
15874 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15875 vdup_n_u32 (uint32_t __a)
15877 return (uint32x2_t) {__a, __a};
15880 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15881 vdup_n_u64 (uint64_t __a)
15883 return (uint64x1_t) {__a};
15886 /* vdupq_n */
15888 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15889 vdupq_n_f32 (float32_t __a)
15891 return (float32x4_t) {__a, __a, __a, __a};
15894 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15895 vdupq_n_f64 (float64_t __a)
15897 return (float64x2_t) {__a, __a};
15900 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15901 vdupq_n_p8 (uint32_t __a)
15903 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15904 __a, __a, __a, __a, __a, __a, __a, __a};
15907 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15908 vdupq_n_p16 (uint32_t __a)
15910 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15913 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15914 vdupq_n_s8 (int32_t __a)
15916 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15917 __a, __a, __a, __a, __a, __a, __a, __a};
15920 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15921 vdupq_n_s16 (int32_t __a)
15923 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15926 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15927 vdupq_n_s32 (int32_t __a)
15929 return (int32x4_t) {__a, __a, __a, __a};
15932 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15933 vdupq_n_s64 (int64_t __a)
15935 return (int64x2_t) {__a, __a};
15938 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15939 vdupq_n_u8 (uint32_t __a)
15941 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15942 __a, __a, __a, __a, __a, __a, __a, __a};
15945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15946 vdupq_n_u16 (uint32_t __a)
15948 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15951 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15952 vdupq_n_u32 (uint32_t __a)
15954 return (uint32x4_t) {__a, __a, __a, __a};
15957 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15958 vdupq_n_u64 (uint64_t __a)
15960 return (uint64x2_t) {__a, __a};
15963 /* vdup_lane */
15965 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15966 vdup_lane_f32 (float32x2_t __a, const int __b)
15968 return __aarch64_vdup_lane_f32 (__a, __b);
15971 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15972 vdup_lane_f64 (float64x1_t __a, const int __b)
15974 return __aarch64_vdup_lane_f64 (__a, __b);
15977 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15978 vdup_lane_p8 (poly8x8_t __a, const int __b)
15980 return __aarch64_vdup_lane_p8 (__a, __b);
15983 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15984 vdup_lane_p16 (poly16x4_t __a, const int __b)
15986 return __aarch64_vdup_lane_p16 (__a, __b);
15989 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15990 vdup_lane_s8 (int8x8_t __a, const int __b)
15992 return __aarch64_vdup_lane_s8 (__a, __b);
15995 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15996 vdup_lane_s16 (int16x4_t __a, const int __b)
15998 return __aarch64_vdup_lane_s16 (__a, __b);
16001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16002 vdup_lane_s32 (int32x2_t __a, const int __b)
16004 return __aarch64_vdup_lane_s32 (__a, __b);
16007 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16008 vdup_lane_s64 (int64x1_t __a, const int __b)
16010 return __aarch64_vdup_lane_s64 (__a, __b);
16013 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16014 vdup_lane_u8 (uint8x8_t __a, const int __b)
16016 return __aarch64_vdup_lane_u8 (__a, __b);
16019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16020 vdup_lane_u16 (uint16x4_t __a, const int __b)
16022 return __aarch64_vdup_lane_u16 (__a, __b);
16025 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16026 vdup_lane_u32 (uint32x2_t __a, const int __b)
16028 return __aarch64_vdup_lane_u32 (__a, __b);
16031 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16032 vdup_lane_u64 (uint64x1_t __a, const int __b)
16034 return __aarch64_vdup_lane_u64 (__a, __b);
16037 /* vdup_laneq */
16039 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16040 vdup_laneq_f32 (float32x4_t __a, const int __b)
16042 return __aarch64_vdup_laneq_f32 (__a, __b);
16045 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16046 vdup_laneq_f64 (float64x2_t __a, const int __b)
16048 return __aarch64_vdup_laneq_f64 (__a, __b);
16051 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16052 vdup_laneq_p8 (poly8x16_t __a, const int __b)
16054 return __aarch64_vdup_laneq_p8 (__a, __b);
16057 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16058 vdup_laneq_p16 (poly16x8_t __a, const int __b)
16060 return __aarch64_vdup_laneq_p16 (__a, __b);
16063 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16064 vdup_laneq_s8 (int8x16_t __a, const int __b)
16066 return __aarch64_vdup_laneq_s8 (__a, __b);
16069 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16070 vdup_laneq_s16 (int16x8_t __a, const int __b)
16072 return __aarch64_vdup_laneq_s16 (__a, __b);
16075 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16076 vdup_laneq_s32 (int32x4_t __a, const int __b)
16078 return __aarch64_vdup_laneq_s32 (__a, __b);
16081 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16082 vdup_laneq_s64 (int64x2_t __a, const int __b)
16084 return __aarch64_vdup_laneq_s64 (__a, __b);
16087 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16088 vdup_laneq_u8 (uint8x16_t __a, const int __b)
16090 return __aarch64_vdup_laneq_u8 (__a, __b);
16093 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16094 vdup_laneq_u16 (uint16x8_t __a, const int __b)
16096 return __aarch64_vdup_laneq_u16 (__a, __b);
16099 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16100 vdup_laneq_u32 (uint32x4_t __a, const int __b)
16102 return __aarch64_vdup_laneq_u32 (__a, __b);
16105 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16106 vdup_laneq_u64 (uint64x2_t __a, const int __b)
16108 return __aarch64_vdup_laneq_u64 (__a, __b);
16111 /* vdupq_lane */
16112 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16113 vdupq_lane_f32 (float32x2_t __a, const int __b)
16115 return __aarch64_vdupq_lane_f32 (__a, __b);
16118 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16119 vdupq_lane_f64 (float64x1_t __a, const int __b)
16121 return __aarch64_vdupq_lane_f64 (__a, __b);
16124 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16125 vdupq_lane_p8 (poly8x8_t __a, const int __b)
16127 return __aarch64_vdupq_lane_p8 (__a, __b);
16130 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16131 vdupq_lane_p16 (poly16x4_t __a, const int __b)
16133 return __aarch64_vdupq_lane_p16 (__a, __b);
16136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16137 vdupq_lane_s8 (int8x8_t __a, const int __b)
16139 return __aarch64_vdupq_lane_s8 (__a, __b);
16142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16143 vdupq_lane_s16 (int16x4_t __a, const int __b)
16145 return __aarch64_vdupq_lane_s16 (__a, __b);
16148 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16149 vdupq_lane_s32 (int32x2_t __a, const int __b)
16151 return __aarch64_vdupq_lane_s32 (__a, __b);
16154 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16155 vdupq_lane_s64 (int64x1_t __a, const int __b)
16157 return __aarch64_vdupq_lane_s64 (__a, __b);
16160 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16161 vdupq_lane_u8 (uint8x8_t __a, const int __b)
16163 return __aarch64_vdupq_lane_u8 (__a, __b);
16166 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16167 vdupq_lane_u16 (uint16x4_t __a, const int __b)
16169 return __aarch64_vdupq_lane_u16 (__a, __b);
16172 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16173 vdupq_lane_u32 (uint32x2_t __a, const int __b)
16175 return __aarch64_vdupq_lane_u32 (__a, __b);
16178 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16179 vdupq_lane_u64 (uint64x1_t __a, const int __b)
16181 return __aarch64_vdupq_lane_u64 (__a, __b);
16184 /* vdupq_laneq */
16185 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16186 vdupq_laneq_f32 (float32x4_t __a, const int __b)
16188 return __aarch64_vdupq_laneq_f32 (__a, __b);
16191 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16192 vdupq_laneq_f64 (float64x2_t __a, const int __b)
16194 return __aarch64_vdupq_laneq_f64 (__a, __b);
16197 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16198 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
16200 return __aarch64_vdupq_laneq_p8 (__a, __b);
16203 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16204 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
16206 return __aarch64_vdupq_laneq_p16 (__a, __b);
16209 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16210 vdupq_laneq_s8 (int8x16_t __a, const int __b)
16212 return __aarch64_vdupq_laneq_s8 (__a, __b);
16215 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16216 vdupq_laneq_s16 (int16x8_t __a, const int __b)
16218 return __aarch64_vdupq_laneq_s16 (__a, __b);
16221 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16222 vdupq_laneq_s32 (int32x4_t __a, const int __b)
16224 return __aarch64_vdupq_laneq_s32 (__a, __b);
16227 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16228 vdupq_laneq_s64 (int64x2_t __a, const int __b)
16230 return __aarch64_vdupq_laneq_s64 (__a, __b);
16233 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16234 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
16236 return __aarch64_vdupq_laneq_u8 (__a, __b);
16239 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16240 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
16242 return __aarch64_vdupq_laneq_u16 (__a, __b);
16245 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16246 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
16248 return __aarch64_vdupq_laneq_u32 (__a, __b);
16251 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16252 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
16254 return __aarch64_vdupq_laneq_u64 (__a, __b);
16257 /* vdupb_lane */
16258 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16259 vdupb_lane_p8 (poly8x8_t __a, const int __b)
16261 return __aarch64_vget_lane_p8 (__a, __b);
16264 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16265 vdupb_lane_s8 (int8x8_t __a, const int __b)
16267 return __aarch64_vget_lane_s8 (__a, __b);
16270 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16271 vdupb_lane_u8 (uint8x8_t __a, const int __b)
16273 return __aarch64_vget_lane_u8 (__a, __b);
16276 /* vduph_lane */
16277 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16278 vduph_lane_p16 (poly16x4_t __a, const int __b)
16280 return __aarch64_vget_lane_p16 (__a, __b);
16283 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16284 vduph_lane_s16 (int16x4_t __a, const int __b)
16286 return __aarch64_vget_lane_s16 (__a, __b);
16289 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16290 vduph_lane_u16 (uint16x4_t __a, const int __b)
16292 return __aarch64_vget_lane_u16 (__a, __b);
16295 /* vdups_lane */
16296 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16297 vdups_lane_f32 (float32x2_t __a, const int __b)
16299 return __aarch64_vget_lane_f32 (__a, __b);
16302 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16303 vdups_lane_s32 (int32x2_t __a, const int __b)
16305 return __aarch64_vget_lane_s32 (__a, __b);
16308 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16309 vdups_lane_u32 (uint32x2_t __a, const int __b)
16311 return __aarch64_vget_lane_u32 (__a, __b);
16314 /* vdupd_lane */
16315 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16316 vdupd_lane_f64 (float64x1_t __a, const int __b)
16318 __builtin_aarch64_im_lane_boundsi (__b, 1);
16319 return __a[0];
16322 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16323 vdupd_lane_s64 (int64x1_t __a, const int __b)
16325 __builtin_aarch64_im_lane_boundsi (__b, 1);
16326 return __a[0];
16329 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16330 vdupd_lane_u64 (uint64x1_t __a, const int __b)
16332 __builtin_aarch64_im_lane_boundsi (__b, 1);
16333 return __a[0];
16336 /* vdupb_laneq */
16337 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16338 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
16340 return __aarch64_vgetq_lane_p8 (__a, __b);
16343 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16344 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
16346 return __aarch64_vgetq_lane_s8 (__a, __b);
16349 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16350 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
16352 return __aarch64_vgetq_lane_u8 (__a, __b);
16355 /* vduph_laneq */
16356 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16357 vduph_laneq_p16 (poly16x8_t __a, const int __b)
16359 return __aarch64_vgetq_lane_p16 (__a, __b);
16362 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16363 vduph_laneq_s16 (int16x8_t __a, const int __b)
16365 return __aarch64_vgetq_lane_s16 (__a, __b);
16368 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16369 vduph_laneq_u16 (uint16x8_t __a, const int __b)
16371 return __aarch64_vgetq_lane_u16 (__a, __b);
16374 /* vdups_laneq */
16375 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16376 vdups_laneq_f32 (float32x4_t __a, const int __b)
16378 return __aarch64_vgetq_lane_f32 (__a, __b);
16381 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16382 vdups_laneq_s32 (int32x4_t __a, const int __b)
16384 return __aarch64_vgetq_lane_s32 (__a, __b);
16387 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16388 vdups_laneq_u32 (uint32x4_t __a, const int __b)
16390 return __aarch64_vgetq_lane_u32 (__a, __b);
16393 /* vdupd_laneq */
16394 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16395 vdupd_laneq_f64 (float64x2_t __a, const int __b)
16397 return __aarch64_vgetq_lane_f64 (__a, __b);
16400 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16401 vdupd_laneq_s64 (int64x2_t __a, const int __b)
16403 return __aarch64_vgetq_lane_s64 (__a, __b);
16406 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16407 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
16409 return __aarch64_vgetq_lane_u64 (__a, __b);
16412 /* vext */
16414 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16415 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
16417 __builtin_aarch64_im_lane_boundsi (__c, 2);
16418 #ifdef __AARCH64EB__
16419 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16420 #else
16421 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16422 #endif
16425 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16426 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
16428 /* The only possible index to the assembler instruction returns element 0. */
16429 __builtin_aarch64_im_lane_boundsi (__c, 1);
16430 return __a;
16432 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16433 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
16435 __builtin_aarch64_im_lane_boundsi (__c, 8);
16436 #ifdef __AARCH64EB__
16437 return __builtin_shuffle (__b, __a, (uint8x8_t)
16438 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16439 #else
16440 return __builtin_shuffle (__a, __b,
16441 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16442 #endif
16445 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16446 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
16448 __builtin_aarch64_im_lane_boundsi (__c, 4);
16449 #ifdef __AARCH64EB__
16450 return __builtin_shuffle (__b, __a,
16451 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16452 #else
16453 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16454 #endif
16457 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16458 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
16460 __builtin_aarch64_im_lane_boundsi (__c, 8);
16461 #ifdef __AARCH64EB__
16462 return __builtin_shuffle (__b, __a, (uint8x8_t)
16463 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16464 #else
16465 return __builtin_shuffle (__a, __b,
16466 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16467 #endif
16470 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16471 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
16473 __builtin_aarch64_im_lane_boundsi (__c, 4);
16474 #ifdef __AARCH64EB__
16475 return __builtin_shuffle (__b, __a,
16476 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16477 #else
16478 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16479 #endif
16482 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16483 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
16485 __builtin_aarch64_im_lane_boundsi (__c, 2);
16486 #ifdef __AARCH64EB__
16487 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16488 #else
16489 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16490 #endif
16493 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16494 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
16496 /* The only possible index to the assembler instruction returns element 0. */
16497 __builtin_aarch64_im_lane_boundsi (__c, 1);
16498 return __a;
16501 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16502 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
16504 __builtin_aarch64_im_lane_boundsi (__c, 8);
16505 #ifdef __AARCH64EB__
16506 return __builtin_shuffle (__b, __a, (uint8x8_t)
16507 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16508 #else
16509 return __builtin_shuffle (__a, __b,
16510 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16511 #endif
16514 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16515 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
16517 __builtin_aarch64_im_lane_boundsi (__c, 4);
16518 #ifdef __AARCH64EB__
16519 return __builtin_shuffle (__b, __a,
16520 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16521 #else
16522 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16523 #endif
16526 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16527 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
16529 __builtin_aarch64_im_lane_boundsi (__c, 2);
16530 #ifdef __AARCH64EB__
16531 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16532 #else
16533 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16534 #endif
16537 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16538 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
16540 /* The only possible index to the assembler instruction returns element 0. */
16541 __builtin_aarch64_im_lane_boundsi (__c, 1);
16542 return __a;
16545 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16546 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
16548 __builtin_aarch64_im_lane_boundsi (__c, 4);
16549 #ifdef __AARCH64EB__
16550 return __builtin_shuffle (__b, __a,
16551 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16552 #else
16553 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16554 #endif
16557 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16558 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
16560 __builtin_aarch64_im_lane_boundsi (__c, 2);
16561 #ifdef __AARCH64EB__
16562 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16563 #else
16564 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16565 #endif
16568 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16569 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
16571 __builtin_aarch64_im_lane_boundsi (__c, 16);
16572 #ifdef __AARCH64EB__
16573 return __builtin_shuffle (__b, __a, (uint8x16_t)
16574 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16575 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16576 #else
16577 return __builtin_shuffle (__a, __b, (uint8x16_t)
16578 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16579 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16580 #endif
16583 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16584 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
16586 __builtin_aarch64_im_lane_boundsi (__c, 8);
16587 #ifdef __AARCH64EB__
16588 return __builtin_shuffle (__b, __a, (uint16x8_t)
16589 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16590 #else
16591 return __builtin_shuffle (__a, __b,
16592 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16593 #endif
16596 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16597 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
16599 __builtin_aarch64_im_lane_boundsi (__c, 16);
16600 #ifdef __AARCH64EB__
16601 return __builtin_shuffle (__b, __a, (uint8x16_t)
16602 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16603 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16604 #else
16605 return __builtin_shuffle (__a, __b, (uint8x16_t)
16606 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16607 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16608 #endif
16611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16612 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
16614 __builtin_aarch64_im_lane_boundsi (__c, 8);
16615 #ifdef __AARCH64EB__
16616 return __builtin_shuffle (__b, __a, (uint16x8_t)
16617 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16618 #else
16619 return __builtin_shuffle (__a, __b,
16620 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16621 #endif
16624 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16625 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
16627 __builtin_aarch64_im_lane_boundsi (__c, 4);
16628 #ifdef __AARCH64EB__
16629 return __builtin_shuffle (__b, __a,
16630 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16631 #else
16632 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16633 #endif
16636 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16637 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
16639 __builtin_aarch64_im_lane_boundsi (__c, 2);
16640 #ifdef __AARCH64EB__
16641 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16642 #else
16643 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16644 #endif
16647 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16648 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
16650 __builtin_aarch64_im_lane_boundsi (__c, 16);
16651 #ifdef __AARCH64EB__
16652 return __builtin_shuffle (__b, __a, (uint8x16_t)
16653 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16654 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16655 #else
16656 return __builtin_shuffle (__a, __b, (uint8x16_t)
16657 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16658 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16659 #endif
16662 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16663 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
16665 __builtin_aarch64_im_lane_boundsi (__c, 8);
16666 #ifdef __AARCH64EB__
16667 return __builtin_shuffle (__b, __a, (uint16x8_t)
16668 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16669 #else
16670 return __builtin_shuffle (__a, __b,
16671 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16672 #endif
16675 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16676 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
16678 __builtin_aarch64_im_lane_boundsi (__c, 4);
16679 #ifdef __AARCH64EB__
16680 return __builtin_shuffle (__b, __a,
16681 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16682 #else
16683 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16684 #endif
16687 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16688 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
16690 __builtin_aarch64_im_lane_boundsi (__c, 2);
16691 #ifdef __AARCH64EB__
16692 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16693 #else
16694 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16695 #endif
16698 /* vfma */
16700 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16701 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16703 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16706 /* vfma_lane */
16708 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16709 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
16710 float32x2_t __c, const int __lane)
16712 return __builtin_aarch64_fmav2sf (__b,
16713 __aarch64_vdup_lane_f32 (__c, __lane),
16714 __a);
16717 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16718 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
16719 float64x1_t __c, const int __lane)
16721 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16724 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16725 vfmad_lane_f64 (float64_t __a, float64_t __b,
16726 float64x1_t __c, const int __lane)
16728 return __builtin_fma (__b, __c[0], __a);
16731 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16732 vfmas_lane_f32 (float32_t __a, float32_t __b,
16733 float32x2_t __c, const int __lane)
16735 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16738 /* vfma_laneq */
16740 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16741 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
16742 float32x4_t __c, const int __lane)
16744 return __builtin_aarch64_fmav2sf (__b,
16745 __aarch64_vdup_laneq_f32 (__c, __lane),
16746 __a);
16749 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16750 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
16751 float64x2_t __c, const int __lane)
16753 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16754 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
16757 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16758 vfmad_laneq_f64 (float64_t __a, float64_t __b,
16759 float64x2_t __c, const int __lane)
16761 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16764 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16765 vfmas_laneq_f32 (float32_t __a, float32_t __b,
16766 float32x4_t __c, const int __lane)
16768 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16771 /* vfmaq_lane */
16773 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16774 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
16775 float32x2_t __c, const int __lane)
16777 return __builtin_aarch64_fmav4sf (__b,
16778 __aarch64_vdupq_lane_f32 (__c, __lane),
16779 __a);
16782 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16783 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
16784 float64x1_t __c, const int __lane)
16786 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
16789 /* vfmaq_laneq */
16791 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16792 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16793 float32x4_t __c, const int __lane)
16795 return __builtin_aarch64_fmav4sf (__b,
16796 __aarch64_vdupq_laneq_f32 (__c, __lane),
16797 __a);
16800 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16801 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16802 float64x2_t __c, const int __lane)
16804 return __builtin_aarch64_fmav2df (__b,
16805 __aarch64_vdupq_laneq_f64 (__c, __lane),
16806 __a);
16809 /* vfms */
16811 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16812 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16814 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16817 /* vfms_lane */
16819 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16820 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
16821 float32x2_t __c, const int __lane)
16823 return __builtin_aarch64_fmav2sf (-__b,
16824 __aarch64_vdup_lane_f32 (__c, __lane),
16825 __a);
16828 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16829 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
16830 float64x1_t __c, const int __lane)
16832 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16835 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16836 vfmsd_lane_f64 (float64_t __a, float64_t __b,
16837 float64x1_t __c, const int __lane)
16839 return __builtin_fma (-__b, __c[0], __a);
16842 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16843 vfmss_lane_f32 (float32_t __a, float32_t __b,
16844 float32x2_t __c, const int __lane)
16846 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16849 /* vfms_laneq */
16851 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16852 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
16853 float32x4_t __c, const int __lane)
16855 return __builtin_aarch64_fmav2sf (-__b,
16856 __aarch64_vdup_laneq_f32 (__c, __lane),
16857 __a);
16860 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16861 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
16862 float64x2_t __c, const int __lane)
16864 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16865 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
16868 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16869 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
16870 float64x2_t __c, const int __lane)
16872 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16875 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16876 vfmss_laneq_f32 (float32_t __a, float32_t __b,
16877 float32x4_t __c, const int __lane)
16879 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16882 /* vfmsq_lane */
16884 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16885 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
16886 float32x2_t __c, const int __lane)
16888 return __builtin_aarch64_fmav4sf (-__b,
16889 __aarch64_vdupq_lane_f32 (__c, __lane),
16890 __a);
16893 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16894 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
16895 float64x1_t __c, const int __lane)
16897 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
16900 /* vfmsq_laneq */
16902 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16903 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16904 float32x4_t __c, const int __lane)
16906 return __builtin_aarch64_fmav4sf (-__b,
16907 __aarch64_vdupq_laneq_f32 (__c, __lane),
16908 __a);
16911 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16912 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16913 float64x2_t __c, const int __lane)
16915 return __builtin_aarch64_fmav2df (-__b,
16916 __aarch64_vdupq_laneq_f64 (__c, __lane),
16917 __a);
16920 /* vld1 */
16922 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16923 vld1_f32 (const float32_t *a)
16925 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
16928 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16929 vld1_f64 (const float64_t *a)
16931 return (float64x1_t) {*a};
16934 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16935 vld1_p8 (const poly8_t *a)
16937 return (poly8x8_t)
16938 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16941 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16942 vld1_p16 (const poly16_t *a)
16944 return (poly16x4_t)
16945 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16948 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16949 vld1_s8 (const int8_t *a)
16951 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16954 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16955 vld1_s16 (const int16_t *a)
16957 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16960 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16961 vld1_s32 (const int32_t *a)
16963 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16966 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16967 vld1_s64 (const int64_t *a)
16969 return (int64x1_t) {*a};
16972 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16973 vld1_u8 (const uint8_t *a)
16975 return (uint8x8_t)
16976 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16979 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16980 vld1_u16 (const uint16_t *a)
16982 return (uint16x4_t)
16983 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16986 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16987 vld1_u32 (const uint32_t *a)
16989 return (uint32x2_t)
16990 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16993 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16994 vld1_u64 (const uint64_t *a)
16996 return (uint64x1_t) {*a};
16999 /* vld1q */
17001 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17002 vld1q_f32 (const float32_t *a)
17004 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
17007 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17008 vld1q_f64 (const float64_t *a)
17010 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
17013 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17014 vld1q_p8 (const poly8_t *a)
17016 return (poly8x16_t)
17017 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17020 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17021 vld1q_p16 (const poly16_t *a)
17023 return (poly16x8_t)
17024 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17027 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17028 vld1q_s8 (const int8_t *a)
17030 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17033 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17034 vld1q_s16 (const int16_t *a)
17036 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17039 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17040 vld1q_s32 (const int32_t *a)
17042 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17045 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17046 vld1q_s64 (const int64_t *a)
17048 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17051 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17052 vld1q_u8 (const uint8_t *a)
17054 return (uint8x16_t)
17055 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17058 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17059 vld1q_u16 (const uint16_t *a)
17061 return (uint16x8_t)
17062 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17065 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17066 vld1q_u32 (const uint32_t *a)
17068 return (uint32x4_t)
17069 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17072 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17073 vld1q_u64 (const uint64_t *a)
17075 return (uint64x2_t)
17076 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17079 /* vldn */
17081 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
17082 vld2_s64 (const int64_t * __a)
17084 int64x1x2_t ret;
17085 __builtin_aarch64_simd_oi __o;
17086 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17087 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17088 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17089 return ret;
17092 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
17093 vld2_u64 (const uint64_t * __a)
17095 uint64x1x2_t ret;
17096 __builtin_aarch64_simd_oi __o;
17097 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17098 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17099 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17100 return ret;
17103 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
17104 vld2_f64 (const float64_t * __a)
17106 float64x1x2_t ret;
17107 __builtin_aarch64_simd_oi __o;
17108 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
17109 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
17110 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
17111 return ret;
17114 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
17115 vld2_s8 (const int8_t * __a)
17117 int8x8x2_t ret;
17118 __builtin_aarch64_simd_oi __o;
17119 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17120 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17121 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17122 return ret;
17125 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
17126 vld2_p8 (const poly8_t * __a)
17128 poly8x8x2_t ret;
17129 __builtin_aarch64_simd_oi __o;
17130 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17131 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17132 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17133 return ret;
17136 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
17137 vld2_s16 (const int16_t * __a)
17139 int16x4x2_t ret;
17140 __builtin_aarch64_simd_oi __o;
17141 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17142 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17143 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17144 return ret;
17147 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
17148 vld2_p16 (const poly16_t * __a)
17150 poly16x4x2_t ret;
17151 __builtin_aarch64_simd_oi __o;
17152 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17153 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17154 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17155 return ret;
17158 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
17159 vld2_s32 (const int32_t * __a)
17161 int32x2x2_t ret;
17162 __builtin_aarch64_simd_oi __o;
17163 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17164 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17165 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17166 return ret;
17169 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
17170 vld2_u8 (const uint8_t * __a)
17172 uint8x8x2_t ret;
17173 __builtin_aarch64_simd_oi __o;
17174 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17175 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17176 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17177 return ret;
17180 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
17181 vld2_u16 (const uint16_t * __a)
17183 uint16x4x2_t ret;
17184 __builtin_aarch64_simd_oi __o;
17185 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17186 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17187 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17188 return ret;
17191 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
17192 vld2_u32 (const uint32_t * __a)
17194 uint32x2x2_t ret;
17195 __builtin_aarch64_simd_oi __o;
17196 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17197 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17198 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17199 return ret;
17202 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
17203 vld2_f32 (const float32_t * __a)
17205 float32x2x2_t ret;
17206 __builtin_aarch64_simd_oi __o;
17207 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
17208 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
17209 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
17210 return ret;
17213 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
17214 vld2q_s8 (const int8_t * __a)
17216 int8x16x2_t ret;
17217 __builtin_aarch64_simd_oi __o;
17218 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17219 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17220 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17221 return ret;
17224 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
17225 vld2q_p8 (const poly8_t * __a)
17227 poly8x16x2_t ret;
17228 __builtin_aarch64_simd_oi __o;
17229 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17230 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17231 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17232 return ret;
17235 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
17236 vld2q_s16 (const int16_t * __a)
17238 int16x8x2_t ret;
17239 __builtin_aarch64_simd_oi __o;
17240 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17241 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17242 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17243 return ret;
17246 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
17247 vld2q_p16 (const poly16_t * __a)
17249 poly16x8x2_t ret;
17250 __builtin_aarch64_simd_oi __o;
17251 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17252 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17253 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17254 return ret;
17257 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
17258 vld2q_s32 (const int32_t * __a)
17260 int32x4x2_t ret;
17261 __builtin_aarch64_simd_oi __o;
17262 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17263 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17264 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17265 return ret;
17268 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
17269 vld2q_s64 (const int64_t * __a)
17271 int64x2x2_t ret;
17272 __builtin_aarch64_simd_oi __o;
17273 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17274 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17275 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17276 return ret;
17279 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
17280 vld2q_u8 (const uint8_t * __a)
17282 uint8x16x2_t ret;
17283 __builtin_aarch64_simd_oi __o;
17284 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17285 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17286 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17287 return ret;
17290 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
17291 vld2q_u16 (const uint16_t * __a)
17293 uint16x8x2_t ret;
17294 __builtin_aarch64_simd_oi __o;
17295 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17296 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17297 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17298 return ret;
17301 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
17302 vld2q_u32 (const uint32_t * __a)
17304 uint32x4x2_t ret;
17305 __builtin_aarch64_simd_oi __o;
17306 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17307 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17308 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17309 return ret;
17312 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
17313 vld2q_u64 (const uint64_t * __a)
17315 uint64x2x2_t ret;
17316 __builtin_aarch64_simd_oi __o;
17317 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17318 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17319 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17320 return ret;
17323 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
17324 vld2q_f32 (const float32_t * __a)
17326 float32x4x2_t ret;
17327 __builtin_aarch64_simd_oi __o;
17328 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
17329 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
17330 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
17331 return ret;
17334 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
17335 vld2q_f64 (const float64_t * __a)
17337 float64x2x2_t ret;
17338 __builtin_aarch64_simd_oi __o;
17339 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
17340 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
17341 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
17342 return ret;
17345 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
17346 vld3_s64 (const int64_t * __a)
17348 int64x1x3_t ret;
17349 __builtin_aarch64_simd_ci __o;
17350 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17351 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17352 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17353 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17354 return ret;
17357 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
17358 vld3_u64 (const uint64_t * __a)
17360 uint64x1x3_t ret;
17361 __builtin_aarch64_simd_ci __o;
17362 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17363 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17364 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17365 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17366 return ret;
17369 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
17370 vld3_f64 (const float64_t * __a)
17372 float64x1x3_t ret;
17373 __builtin_aarch64_simd_ci __o;
17374 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
17375 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
17376 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
17377 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
17378 return ret;
17381 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
17382 vld3_s8 (const int8_t * __a)
17384 int8x8x3_t ret;
17385 __builtin_aarch64_simd_ci __o;
17386 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17387 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17388 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17389 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17390 return ret;
17393 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
17394 vld3_p8 (const poly8_t * __a)
17396 poly8x8x3_t ret;
17397 __builtin_aarch64_simd_ci __o;
17398 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17399 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17400 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17401 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17402 return ret;
17405 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17406 vld3_s16 (const int16_t * __a)
17408 int16x4x3_t ret;
17409 __builtin_aarch64_simd_ci __o;
17410 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17411 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17412 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17413 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17414 return ret;
17417 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17418 vld3_p16 (const poly16_t * __a)
17420 poly16x4x3_t ret;
17421 __builtin_aarch64_simd_ci __o;
17422 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17423 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17424 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17425 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17426 return ret;
17429 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17430 vld3_s32 (const int32_t * __a)
17432 int32x2x3_t ret;
17433 __builtin_aarch64_simd_ci __o;
17434 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17435 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17436 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17437 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17438 return ret;
17441 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17442 vld3_u8 (const uint8_t * __a)
17444 uint8x8x3_t ret;
17445 __builtin_aarch64_simd_ci __o;
17446 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17447 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17448 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17449 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17450 return ret;
17453 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17454 vld3_u16 (const uint16_t * __a)
17456 uint16x4x3_t ret;
17457 __builtin_aarch64_simd_ci __o;
17458 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17459 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17460 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17461 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17462 return ret;
17465 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17466 vld3_u32 (const uint32_t * __a)
17468 uint32x2x3_t ret;
17469 __builtin_aarch64_simd_ci __o;
17470 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17471 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17472 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17473 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17474 return ret;
17477 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17478 vld3_f32 (const float32_t * __a)
17480 float32x2x3_t ret;
17481 __builtin_aarch64_simd_ci __o;
17482 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
17483 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17484 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17485 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17486 return ret;
17489 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17490 vld3q_s8 (const int8_t * __a)
17492 int8x16x3_t ret;
17493 __builtin_aarch64_simd_ci __o;
17494 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17495 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17496 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17497 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17498 return ret;
17501 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17502 vld3q_p8 (const poly8_t * __a)
17504 poly8x16x3_t ret;
17505 __builtin_aarch64_simd_ci __o;
17506 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17507 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17508 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17509 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17510 return ret;
17513 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17514 vld3q_s16 (const int16_t * __a)
17516 int16x8x3_t ret;
17517 __builtin_aarch64_simd_ci __o;
17518 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17519 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17520 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17521 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17522 return ret;
17525 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17526 vld3q_p16 (const poly16_t * __a)
17528 poly16x8x3_t ret;
17529 __builtin_aarch64_simd_ci __o;
17530 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17531 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17532 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17533 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17534 return ret;
17537 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17538 vld3q_s32 (const int32_t * __a)
17540 int32x4x3_t ret;
17541 __builtin_aarch64_simd_ci __o;
17542 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17543 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17544 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17545 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17546 return ret;
17549 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17550 vld3q_s64 (const int64_t * __a)
17552 int64x2x3_t ret;
17553 __builtin_aarch64_simd_ci __o;
17554 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17555 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17556 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17557 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17558 return ret;
17561 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17562 vld3q_u8 (const uint8_t * __a)
17564 uint8x16x3_t ret;
17565 __builtin_aarch64_simd_ci __o;
17566 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17567 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17568 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17569 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17570 return ret;
17573 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17574 vld3q_u16 (const uint16_t * __a)
17576 uint16x8x3_t ret;
17577 __builtin_aarch64_simd_ci __o;
17578 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17579 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17580 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17581 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17582 return ret;
17585 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17586 vld3q_u32 (const uint32_t * __a)
17588 uint32x4x3_t ret;
17589 __builtin_aarch64_simd_ci __o;
17590 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17591 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17592 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17593 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17594 return ret;
17597 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17598 vld3q_u64 (const uint64_t * __a)
17600 uint64x2x3_t ret;
17601 __builtin_aarch64_simd_ci __o;
17602 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17603 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17604 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17605 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17606 return ret;
17609 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17610 vld3q_f32 (const float32_t * __a)
17612 float32x4x3_t ret;
17613 __builtin_aarch64_simd_ci __o;
17614 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
17615 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17616 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17617 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17618 return ret;
17621 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17622 vld3q_f64 (const float64_t * __a)
17624 float64x2x3_t ret;
17625 __builtin_aarch64_simd_ci __o;
17626 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
17627 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17628 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17629 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17630 return ret;
17633 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17634 vld4_s64 (const int64_t * __a)
17636 int64x1x4_t ret;
17637 __builtin_aarch64_simd_xi __o;
17638 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17639 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17640 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17641 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17642 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17643 return ret;
17646 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17647 vld4_u64 (const uint64_t * __a)
17649 uint64x1x4_t ret;
17650 __builtin_aarch64_simd_xi __o;
17651 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17652 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17653 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17654 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17655 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17656 return ret;
17659 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17660 vld4_f64 (const float64_t * __a)
17662 float64x1x4_t ret;
17663 __builtin_aarch64_simd_xi __o;
17664 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
17665 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17666 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17667 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17668 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17669 return ret;
17672 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17673 vld4_s8 (const int8_t * __a)
17675 int8x8x4_t ret;
17676 __builtin_aarch64_simd_xi __o;
17677 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17678 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17679 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17680 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17681 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17682 return ret;
17685 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17686 vld4_p8 (const poly8_t * __a)
17688 poly8x8x4_t ret;
17689 __builtin_aarch64_simd_xi __o;
17690 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17691 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17692 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17693 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17694 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17695 return ret;
17698 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17699 vld4_s16 (const int16_t * __a)
17701 int16x4x4_t ret;
17702 __builtin_aarch64_simd_xi __o;
17703 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17704 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17705 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17706 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17707 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17708 return ret;
17711 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17712 vld4_p16 (const poly16_t * __a)
17714 poly16x4x4_t ret;
17715 __builtin_aarch64_simd_xi __o;
17716 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17717 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17718 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17719 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17720 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17721 return ret;
17724 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17725 vld4_s32 (const int32_t * __a)
17727 int32x2x4_t ret;
17728 __builtin_aarch64_simd_xi __o;
17729 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17730 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17731 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17732 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17733 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17734 return ret;
17737 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17738 vld4_u8 (const uint8_t * __a)
17740 uint8x8x4_t ret;
17741 __builtin_aarch64_simd_xi __o;
17742 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17743 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17744 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17745 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17746 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17747 return ret;
17750 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17751 vld4_u16 (const uint16_t * __a)
17753 uint16x4x4_t ret;
17754 __builtin_aarch64_simd_xi __o;
17755 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17756 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17757 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17758 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17759 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17760 return ret;
17763 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17764 vld4_u32 (const uint32_t * __a)
17766 uint32x2x4_t ret;
17767 __builtin_aarch64_simd_xi __o;
17768 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17769 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17770 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17771 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17772 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17773 return ret;
17776 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17777 vld4_f32 (const float32_t * __a)
17779 float32x2x4_t ret;
17780 __builtin_aarch64_simd_xi __o;
17781 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
17782 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17783 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17784 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17785 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17786 return ret;
17789 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17790 vld4q_s8 (const int8_t * __a)
17792 int8x16x4_t ret;
17793 __builtin_aarch64_simd_xi __o;
17794 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17795 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17796 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17797 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17798 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17799 return ret;
17802 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17803 vld4q_p8 (const poly8_t * __a)
17805 poly8x16x4_t ret;
17806 __builtin_aarch64_simd_xi __o;
17807 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17808 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17809 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17810 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17811 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17812 return ret;
17815 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17816 vld4q_s16 (const int16_t * __a)
17818 int16x8x4_t ret;
17819 __builtin_aarch64_simd_xi __o;
17820 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17821 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17822 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17823 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17824 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17825 return ret;
17828 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17829 vld4q_p16 (const poly16_t * __a)
17831 poly16x8x4_t ret;
17832 __builtin_aarch64_simd_xi __o;
17833 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17834 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17835 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17836 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17837 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17838 return ret;
17841 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17842 vld4q_s32 (const int32_t * __a)
17844 int32x4x4_t ret;
17845 __builtin_aarch64_simd_xi __o;
17846 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17847 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17848 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17849 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17850 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17851 return ret;
17854 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17855 vld4q_s64 (const int64_t * __a)
17857 int64x2x4_t ret;
17858 __builtin_aarch64_simd_xi __o;
17859 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17860 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17861 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17862 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17863 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17864 return ret;
17867 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17868 vld4q_u8 (const uint8_t * __a)
17870 uint8x16x4_t ret;
17871 __builtin_aarch64_simd_xi __o;
17872 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17873 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17874 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17875 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17876 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17877 return ret;
17880 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17881 vld4q_u16 (const uint16_t * __a)
17883 uint16x8x4_t ret;
17884 __builtin_aarch64_simd_xi __o;
17885 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17886 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17887 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17888 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17889 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17890 return ret;
17893 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17894 vld4q_u32 (const uint32_t * __a)
17896 uint32x4x4_t ret;
17897 __builtin_aarch64_simd_xi __o;
17898 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17899 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17900 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17901 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17902 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17903 return ret;
17906 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17907 vld4q_u64 (const uint64_t * __a)
17909 uint64x2x4_t ret;
17910 __builtin_aarch64_simd_xi __o;
17911 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17912 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17913 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17914 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17915 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17916 return ret;
17919 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17920 vld4q_f32 (const float32_t * __a)
17922 float32x4x4_t ret;
17923 __builtin_aarch64_simd_xi __o;
17924 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
17925 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17926 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17927 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17928 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17929 return ret;
17932 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17933 vld4q_f64 (const float64_t * __a)
17935 float64x2x4_t ret;
17936 __builtin_aarch64_simd_xi __o;
17937 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
17938 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17939 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17940 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17941 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17942 return ret;
17945 /* vmax */
17947 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17948 vmax_f32 (float32x2_t __a, float32x2_t __b)
17950 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17953 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17954 vmax_s8 (int8x8_t __a, int8x8_t __b)
17956 return __builtin_aarch64_smaxv8qi (__a, __b);
17959 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17960 vmax_s16 (int16x4_t __a, int16x4_t __b)
17962 return __builtin_aarch64_smaxv4hi (__a, __b);
17965 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17966 vmax_s32 (int32x2_t __a, int32x2_t __b)
17968 return __builtin_aarch64_smaxv2si (__a, __b);
17971 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17972 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17974 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17975 (int8x8_t) __b);
17978 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17979 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17981 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17982 (int16x4_t) __b);
17985 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17986 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17988 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17989 (int32x2_t) __b);
17992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17993 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17995 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17998 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17999 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
18001 return __builtin_aarch64_smax_nanv2df (__a, __b);
18004 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18005 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
18007 return __builtin_aarch64_smaxv16qi (__a, __b);
18010 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18011 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
18013 return __builtin_aarch64_smaxv8hi (__a, __b);
18016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18017 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
18019 return __builtin_aarch64_smaxv4si (__a, __b);
18022 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18023 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
18025 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
18026 (int8x16_t) __b);
18029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18030 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
18032 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
18033 (int16x8_t) __b);
18036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18037 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
18039 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
18040 (int32x4_t) __b);
18043 /* vmaxnm */
18045 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18046 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
18048 return __builtin_aarch64_smaxv2sf (__a, __b);
18051 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18052 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
18054 return __builtin_aarch64_smaxv4sf (__a, __b);
18057 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18058 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
18060 return __builtin_aarch64_smaxv2df (__a, __b);
18063 /* vmaxv */
18065 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18066 vmaxv_f32 (float32x2_t __a)
18068 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
18072 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18073 vmaxv_s8 (int8x8_t __a)
18075 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
18078 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18079 vmaxv_s16 (int16x4_t __a)
18081 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
18084 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18085 vmaxv_s32 (int32x2_t __a)
18087 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
18090 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18091 vmaxv_u8 (uint8x8_t __a)
18093 return vget_lane_u8 ((uint8x8_t)
18094 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
18098 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18099 vmaxv_u16 (uint16x4_t __a)
18101 return vget_lane_u16 ((uint16x4_t)
18102 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
18106 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18107 vmaxv_u32 (uint32x2_t __a)
18109 return vget_lane_u32 ((uint32x2_t)
18110 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
18114 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18115 vmaxvq_f32 (float32x4_t __a)
18117 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
18121 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18122 vmaxvq_f64 (float64x2_t __a)
18124 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
18128 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18129 vmaxvq_s8 (int8x16_t __a)
18131 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
18134 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18135 vmaxvq_s16 (int16x8_t __a)
18137 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
18140 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18141 vmaxvq_s32 (int32x4_t __a)
18143 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
18146 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18147 vmaxvq_u8 (uint8x16_t __a)
18149 return vgetq_lane_u8 ((uint8x16_t)
18150 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
18154 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18155 vmaxvq_u16 (uint16x8_t __a)
18157 return vgetq_lane_u16 ((uint16x8_t)
18158 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
18162 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18163 vmaxvq_u32 (uint32x4_t __a)
18165 return vgetq_lane_u32 ((uint32x4_t)
18166 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
18170 /* vmaxnmv */
18172 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18173 vmaxnmv_f32 (float32x2_t __a)
18175 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
18179 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18180 vmaxnmvq_f32 (float32x4_t __a)
18182 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
18185 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18186 vmaxnmvq_f64 (float64x2_t __a)
18188 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
18191 /* vmin */
18193 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18194 vmin_f32 (float32x2_t __a, float32x2_t __b)
18196 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18199 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18200 vmin_s8 (int8x8_t __a, int8x8_t __b)
18202 return __builtin_aarch64_sminv8qi (__a, __b);
18205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18206 vmin_s16 (int16x4_t __a, int16x4_t __b)
18208 return __builtin_aarch64_sminv4hi (__a, __b);
18211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18212 vmin_s32 (int32x2_t __a, int32x2_t __b)
18214 return __builtin_aarch64_sminv2si (__a, __b);
18217 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18218 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18220 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18221 (int8x8_t) __b);
18224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18225 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18227 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18228 (int16x4_t) __b);
18231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18232 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18234 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18235 (int32x2_t) __b);
18238 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18239 vminq_f32 (float32x4_t __a, float32x4_t __b)
18241 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18244 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18245 vminq_f64 (float64x2_t __a, float64x2_t __b)
18247 return __builtin_aarch64_smin_nanv2df (__a, __b);
18250 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18251 vminq_s8 (int8x16_t __a, int8x16_t __b)
18253 return __builtin_aarch64_sminv16qi (__a, __b);
18256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18257 vminq_s16 (int16x8_t __a, int16x8_t __b)
18259 return __builtin_aarch64_sminv8hi (__a, __b);
18262 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18263 vminq_s32 (int32x4_t __a, int32x4_t __b)
18265 return __builtin_aarch64_sminv4si (__a, __b);
18268 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18269 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18271 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18272 (int8x16_t) __b);
18275 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18276 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18278 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18279 (int16x8_t) __b);
18282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18283 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18285 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18286 (int32x4_t) __b);
18289 /* vminnm */
18291 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18292 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18294 return __builtin_aarch64_sminv2sf (__a, __b);
18297 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18298 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18300 return __builtin_aarch64_sminv4sf (__a, __b);
18303 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18304 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18306 return __builtin_aarch64_sminv2df (__a, __b);
18309 /* vminv */
18311 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18312 vminv_f32 (float32x2_t __a)
18314 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
18318 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18319 vminv_s8 (int8x8_t __a)
18321 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
18325 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18326 vminv_s16 (int16x4_t __a)
18328 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
18331 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18332 vminv_s32 (int32x2_t __a)
18334 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
18337 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18338 vminv_u8 (uint8x8_t __a)
18340 return vget_lane_u8 ((uint8x8_t)
18341 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
18345 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18346 vminv_u16 (uint16x4_t __a)
18348 return vget_lane_u16 ((uint16x4_t)
18349 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
18353 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18354 vminv_u32 (uint32x2_t __a)
18356 return vget_lane_u32 ((uint32x2_t)
18357 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
18361 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18362 vminvq_f32 (float32x4_t __a)
18364 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
18368 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18369 vminvq_f64 (float64x2_t __a)
18371 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
18375 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18376 vminvq_s8 (int8x16_t __a)
18378 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
18381 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18382 vminvq_s16 (int16x8_t __a)
18384 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
18387 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18388 vminvq_s32 (int32x4_t __a)
18390 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
18393 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18394 vminvq_u8 (uint8x16_t __a)
18396 return vgetq_lane_u8 ((uint8x16_t)
18397 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
18401 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18402 vminvq_u16 (uint16x8_t __a)
18404 return vgetq_lane_u16 ((uint16x8_t)
18405 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
18409 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18410 vminvq_u32 (uint32x4_t __a)
18412 return vgetq_lane_u32 ((uint32x4_t)
18413 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
18417 /* vminnmv */
18419 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18420 vminnmv_f32 (float32x2_t __a)
18422 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
18425 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18426 vminnmvq_f32 (float32x4_t __a)
18428 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
18431 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18432 vminnmvq_f64 (float64x2_t __a)
18434 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
18437 /* vmla */
18439 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18440 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18442 return a + b * c;
18445 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18446 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18448 return __a + __b * __c;
18451 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18452 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18454 return a + b * c;
18457 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18458 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18460 return a + b * c;
18463 /* vmla_lane */
18465 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18466 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18467 float32x2_t __c, const int __lane)
18469 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18472 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18473 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18474 int16x4_t __c, const int __lane)
18476 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18479 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18480 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18481 int32x2_t __c, const int __lane)
18483 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18486 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18487 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18488 uint16x4_t __c, const int __lane)
18490 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18493 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18494 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18495 uint32x2_t __c, const int __lane)
18497 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18500 /* vmla_laneq */
18502 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18503 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18504 float32x4_t __c, const int __lane)
18506 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18509 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18510 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18511 int16x8_t __c, const int __lane)
18513 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18516 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18517 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18518 int32x4_t __c, const int __lane)
18520 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18523 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18524 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18525 uint16x8_t __c, const int __lane)
18527 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18530 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18531 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18532 uint32x4_t __c, const int __lane)
18534 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18537 /* vmlaq_lane */
18539 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18540 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18541 float32x2_t __c, const int __lane)
18543 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18546 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18547 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18548 int16x4_t __c, const int __lane)
18550 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18553 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18554 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18555 int32x2_t __c, const int __lane)
18557 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18560 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18561 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18562 uint16x4_t __c, const int __lane)
18564 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18567 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18568 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18569 uint32x2_t __c, const int __lane)
18571 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18574 /* vmlaq_laneq */
18576 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18577 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18578 float32x4_t __c, const int __lane)
18580 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18583 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18584 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18585 int16x8_t __c, const int __lane)
18587 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18591 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18592 int32x4_t __c, const int __lane)
18594 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18597 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18598 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18599 uint16x8_t __c, const int __lane)
18601 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18604 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18605 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18606 uint32x4_t __c, const int __lane)
18608 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18611 /* vmls */
18613 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18614 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18616 return a - b * c;
18619 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18620 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18622 return __a - __b * __c;
18625 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18626 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18628 return a - b * c;
18631 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18632 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18634 return a - b * c;
18637 /* vmls_lane */
18639 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18640 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18641 float32x2_t __c, const int __lane)
18643 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18647 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18648 int16x4_t __c, const int __lane)
18650 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18653 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18654 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18655 int32x2_t __c, const int __lane)
18657 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18660 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18661 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18662 uint16x4_t __c, const int __lane)
18664 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18667 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18668 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18669 uint32x2_t __c, const int __lane)
18671 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18674 /* vmls_laneq */
18676 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18677 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18678 float32x4_t __c, const int __lane)
18680 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18683 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18684 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18685 int16x8_t __c, const int __lane)
18687 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18690 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18691 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18692 int32x4_t __c, const int __lane)
18694 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18697 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18698 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18699 uint16x8_t __c, const int __lane)
18701 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18705 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18706 uint32x4_t __c, const int __lane)
18708 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18711 /* vmlsq_lane */
18713 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18714 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18715 float32x2_t __c, const int __lane)
18717 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18720 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18721 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18722 int16x4_t __c, const int __lane)
18724 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18727 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18728 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18729 int32x2_t __c, const int __lane)
18731 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18735 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18736 uint16x4_t __c, const int __lane)
18738 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18741 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18742 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18743 uint32x2_t __c, const int __lane)
18745 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18748 /* vmlsq_laneq */
18750 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18751 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18752 float32x4_t __c, const int __lane)
18754 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18757 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18758 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18759 int16x8_t __c, const int __lane)
18761 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18764 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18765 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18766 int32x4_t __c, const int __lane)
18768 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18770 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18771 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18772 uint16x8_t __c, const int __lane)
18774 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18777 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18778 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18779 uint32x4_t __c, const int __lane)
18781 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18784 /* vmov_n_ */
18786 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18787 vmov_n_f32 (float32_t __a)
18789 return vdup_n_f32 (__a);
18792 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18793 vmov_n_f64 (float64_t __a)
18795 return (float64x1_t) {__a};
18798 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18799 vmov_n_p8 (poly8_t __a)
18801 return vdup_n_p8 (__a);
18804 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18805 vmov_n_p16 (poly16_t __a)
18807 return vdup_n_p16 (__a);
18810 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18811 vmov_n_s8 (int8_t __a)
18813 return vdup_n_s8 (__a);
18816 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18817 vmov_n_s16 (int16_t __a)
18819 return vdup_n_s16 (__a);
18822 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18823 vmov_n_s32 (int32_t __a)
18825 return vdup_n_s32 (__a);
18828 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18829 vmov_n_s64 (int64_t __a)
18831 return (int64x1_t) {__a};
18834 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18835 vmov_n_u8 (uint8_t __a)
18837 return vdup_n_u8 (__a);
18840 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18841 vmov_n_u16 (uint16_t __a)
18843 return vdup_n_u16 (__a);
18846 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18847 vmov_n_u32 (uint32_t __a)
18849 return vdup_n_u32 (__a);
18852 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18853 vmov_n_u64 (uint64_t __a)
18855 return (uint64x1_t) {__a};
18858 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18859 vmovq_n_f32 (float32_t __a)
18861 return vdupq_n_f32 (__a);
18864 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18865 vmovq_n_f64 (float64_t __a)
18867 return vdupq_n_f64 (__a);
18870 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18871 vmovq_n_p8 (poly8_t __a)
18873 return vdupq_n_p8 (__a);
18876 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18877 vmovq_n_p16 (poly16_t __a)
18879 return vdupq_n_p16 (__a);
18882 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18883 vmovq_n_s8 (int8_t __a)
18885 return vdupq_n_s8 (__a);
18888 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18889 vmovq_n_s16 (int16_t __a)
18891 return vdupq_n_s16 (__a);
18894 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18895 vmovq_n_s32 (int32_t __a)
18897 return vdupq_n_s32 (__a);
18900 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18901 vmovq_n_s64 (int64_t __a)
18903 return vdupq_n_s64 (__a);
18906 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18907 vmovq_n_u8 (uint8_t __a)
18909 return vdupq_n_u8 (__a);
18912 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18913 vmovq_n_u16 (uint16_t __a)
18915 return vdupq_n_u16 (__a);
18918 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18919 vmovq_n_u32 (uint32_t __a)
18921 return vdupq_n_u32 (__a);
18924 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18925 vmovq_n_u64 (uint64_t __a)
18927 return vdupq_n_u64 (__a);
18930 /* vmul_lane */
18932 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18933 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18935 return __a * __aarch64_vget_lane_f32 (__b, __lane);
18938 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18939 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18941 return __a * __b;
18944 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18945 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18947 return __a * __aarch64_vget_lane_s16 (__b, __lane);
18950 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18951 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18953 return __a * __aarch64_vget_lane_s32 (__b, __lane);
18956 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18957 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18959 return __a * __aarch64_vget_lane_u16 (__b, __lane);
18962 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18963 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18965 return __a * __aarch64_vget_lane_u32 (__b, __lane);
18968 /* vmuld_lane */
18970 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18971 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18973 return __a * vget_lane_f64 (__b, __lane);
18976 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18977 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18979 return __a * vgetq_lane_f64 (__b, __lane);
18982 /* vmuls_lane */
18984 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18985 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18987 return __a * vget_lane_f32 (__b, __lane);
18990 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18991 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18993 return __a * vgetq_lane_f32 (__b, __lane);
18996 /* vmul_laneq */
18998 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18999 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
19001 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19004 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19005 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
19007 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19010 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19011 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
19013 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19016 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19017 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
19019 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19022 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19023 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
19025 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19028 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19029 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
19031 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19034 /* vmul_n */
19036 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19037 vmul_n_f64 (float64x1_t __a, float64_t __b)
19039 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
19042 /* vmulq_lane */
19044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19045 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
19047 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19050 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19051 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
19053 __builtin_aarch64_im_lane_boundsi (__lane, 1);
19054 return __a * __b[0];
19057 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19058 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
19060 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19063 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19064 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
19066 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19069 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19070 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
19072 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19076 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
19078 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19081 /* vmulq_laneq */
19083 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19084 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
19086 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19089 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19090 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
19092 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19095 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19096 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
19098 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19101 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19102 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
19104 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19107 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19108 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
19110 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19113 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19114 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
19116 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19119 /* vneg */
19121 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19122 vneg_f32 (float32x2_t __a)
19124 return -__a;
19127 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19128 vneg_f64 (float64x1_t __a)
19130 return -__a;
19133 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19134 vneg_s8 (int8x8_t __a)
19136 return -__a;
19139 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19140 vneg_s16 (int16x4_t __a)
19142 return -__a;
19145 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19146 vneg_s32 (int32x2_t __a)
19148 return -__a;
19151 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19152 vneg_s64 (int64x1_t __a)
19154 return -__a;
19157 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19158 vnegq_f32 (float32x4_t __a)
19160 return -__a;
19163 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19164 vnegq_f64 (float64x2_t __a)
19166 return -__a;
19169 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19170 vnegq_s8 (int8x16_t __a)
19172 return -__a;
19175 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19176 vnegq_s16 (int16x8_t __a)
19178 return -__a;
19181 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19182 vnegq_s32 (int32x4_t __a)
19184 return -__a;
19187 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19188 vnegq_s64 (int64x2_t __a)
19190 return -__a;
19193 /* vpadd */
19195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19196 vpadd_s8 (int8x8_t __a, int8x8_t __b)
19198 return __builtin_aarch64_addpv8qi (__a, __b);
19201 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19202 vpadd_s16 (int16x4_t __a, int16x4_t __b)
19204 return __builtin_aarch64_addpv4hi (__a, __b);
19207 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19208 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19210 return __builtin_aarch64_addpv2si (__a, __b);
19213 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19214 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19216 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19217 (int8x8_t) __b);
19220 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19221 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19223 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19224 (int16x4_t) __b);
19227 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19228 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19230 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19231 (int32x2_t) __b);
19234 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19235 vpaddd_f64 (float64x2_t __a)
19237 return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
19240 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19241 vpaddd_s64 (int64x2_t __a)
19243 return __builtin_aarch64_addpdi (__a);
19246 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19247 vpaddd_u64 (uint64x2_t __a)
19249 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19252 /* vqabs */
19254 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19255 vqabsq_s64 (int64x2_t __a)
19257 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19260 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19261 vqabsb_s8 (int8x1_t __a)
19263 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
19266 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19267 vqabsh_s16 (int16x1_t __a)
19269 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
19272 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19273 vqabss_s32 (int32x1_t __a)
19275 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
19278 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19279 vqabsd_s64 (int64_t __a)
19281 return __builtin_aarch64_sqabsdi (__a);
19284 /* vqadd */
19286 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19287 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
19289 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
19292 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19293 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
19295 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
19298 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19299 vqadds_s32 (int32x1_t __a, int32x1_t __b)
19301 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
19304 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19305 vqaddd_s64 (int64_t __a, int64_t __b)
19307 return __builtin_aarch64_sqadddi (__a, __b);
19310 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19311 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
19313 return (uint8x1_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19316 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19317 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
19319 return (uint16x1_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19322 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19323 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
19325 return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19328 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19329 vqaddd_u64 (uint64_t __a, uint64_t __b)
19331 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19334 /* vqdmlal */
19336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19337 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19339 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19342 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19343 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19345 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19348 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19349 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19350 int const __d)
19352 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19355 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19356 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19357 int const __d)
19359 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19362 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19363 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19365 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19369 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19371 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19375 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19377 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19380 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19381 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19383 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19386 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19387 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19389 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19392 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19393 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19395 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19398 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19399 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19400 int const __d)
19402 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19406 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19407 int const __d)
19409 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19412 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19413 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19415 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19419 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19421 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19424 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19425 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19427 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19430 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19431 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19433 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19436 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19437 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19439 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19442 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19443 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19445 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19448 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19449 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19451 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19454 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19455 vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19457 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19460 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19461 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19463 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19466 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19467 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19469 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19472 /* vqdmlsl */
19474 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19475 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19477 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19480 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19481 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19483 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19486 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19487 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19488 int const __d)
19490 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19493 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19494 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19495 int const __d)
19497 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19500 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19501 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19503 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19507 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19509 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19512 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19513 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19515 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19519 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19521 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19525 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19527 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19530 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19531 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19533 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19536 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19537 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19538 int const __d)
19540 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19543 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19544 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19545 int const __d)
19547 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19550 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19551 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19553 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19556 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19557 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19559 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19562 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19563 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19565 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19568 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19569 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19571 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19574 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19575 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19577 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19580 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19581 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19583 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19586 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19587 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19589 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19592 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19593 vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19595 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19598 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19599 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19601 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19604 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19605 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19607 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19610 /* vqdmulh */
19612 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19613 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19615 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19618 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19619 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19621 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19624 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19625 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19627 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19631 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19633 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19636 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19637 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19639 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19642 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19643 vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19645 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19648 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19649 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19651 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19654 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19655 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19657 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19660 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19661 vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19663 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19666 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19667 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19669 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19672 /* vqdmull */
19674 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19675 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19677 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19680 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19681 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19683 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19686 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19687 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19689 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19692 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19693 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19695 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19698 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19699 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19701 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19704 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19705 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19707 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19710 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19711 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19713 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19716 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19717 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19719 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19722 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19723 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19725 return __builtin_aarch64_sqdmullv2si (__a, __b);
19728 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19729 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19731 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19734 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19735 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19737 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19741 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19743 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19746 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19747 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19749 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19752 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19753 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19755 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19758 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19759 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19761 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19764 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19765 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19767 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19770 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19771 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
19773 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
19776 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19777 vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19779 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19782 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19783 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
19785 return __builtin_aarch64_sqdmullsi (__a, __b);
19788 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19789 vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19791 return (int64x1_t) {__builtin_aarch64_sqdmull_lanesi (__a, __b, __c)};
19794 /* vqmovn */
19796 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19797 vqmovn_s16 (int16x8_t __a)
19799 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19802 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19803 vqmovn_s32 (int32x4_t __a)
19805 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19808 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19809 vqmovn_s64 (int64x2_t __a)
19811 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19814 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19815 vqmovn_u16 (uint16x8_t __a)
19817 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19820 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19821 vqmovn_u32 (uint32x4_t __a)
19823 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19826 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19827 vqmovn_u64 (uint64x2_t __a)
19829 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19832 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19833 vqmovnh_s16 (int16x1_t __a)
19835 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
19838 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19839 vqmovns_s32 (int32x1_t __a)
19841 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
19844 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19845 vqmovnd_s64 (int64_t __a)
19847 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
19850 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19851 vqmovnh_u16 (uint16x1_t __a)
19853 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
19856 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19857 vqmovns_u32 (uint32x1_t __a)
19859 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
19862 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19863 vqmovnd_u64 (uint64_t __a)
19865 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
19868 /* vqmovun */
19870 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19871 vqmovun_s16 (int16x8_t __a)
19873 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19876 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19877 vqmovun_s32 (int32x4_t __a)
19879 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19882 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19883 vqmovun_s64 (int64x2_t __a)
19885 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19888 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19889 vqmovunh_s16 (int16x1_t __a)
19891 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
19894 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19895 vqmovuns_s32 (int32x1_t __a)
19897 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
19900 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19901 vqmovund_s64 (int64_t __a)
19903 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
19906 /* vqneg */
19908 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19909 vqnegq_s64 (int64x2_t __a)
19911 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19914 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19915 vqnegb_s8 (int8x1_t __a)
19917 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
19920 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19921 vqnegh_s16 (int16x1_t __a)
19923 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
19926 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19927 vqnegs_s32 (int32x1_t __a)
19929 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
19932 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19933 vqnegd_s64 (int64_t __a)
19935 return __builtin_aarch64_sqnegdi (__a);
19938 /* vqrdmulh */
19940 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19941 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19943 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19946 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19947 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19949 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19952 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19953 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19955 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19958 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19959 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19961 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19964 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19965 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19967 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19970 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19971 vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19973 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19976 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19977 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19979 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19982 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19983 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19985 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19988 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19989 vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19991 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19994 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19995 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19997 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
20000 /* vqrshl */
20002 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20003 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
20005 return __builtin_aarch64_sqrshlv8qi (__a, __b);
20008 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20009 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
20011 return __builtin_aarch64_sqrshlv4hi (__a, __b);
20014 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20015 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
20017 return __builtin_aarch64_sqrshlv2si (__a, __b);
20020 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20021 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
20023 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
20026 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20027 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
20029 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
20032 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20033 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
20035 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
20038 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20039 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
20041 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
20044 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20045 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
20047 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
20050 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20051 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
20053 return __builtin_aarch64_sqrshlv16qi (__a, __b);
20056 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20057 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
20059 return __builtin_aarch64_sqrshlv8hi (__a, __b);
20062 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20063 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
20065 return __builtin_aarch64_sqrshlv4si (__a, __b);
20068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20069 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
20071 return __builtin_aarch64_sqrshlv2di (__a, __b);
20074 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20075 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
20077 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
20080 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20081 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
20083 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
20086 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20087 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
20089 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
20092 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20093 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
20095 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
20098 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20099 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
20101 return __builtin_aarch64_sqrshlqi (__a, __b);
20104 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20105 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
20107 return __builtin_aarch64_sqrshlhi (__a, __b);
20110 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20111 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
20113 return __builtin_aarch64_sqrshlsi (__a, __b);
20116 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20117 vqrshld_s64 (int64_t __a, int64_t __b)
20119 return __builtin_aarch64_sqrshldi (__a, __b);
20122 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20123 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20125 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
20128 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20129 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20131 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
20134 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20135 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
20137 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20140 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20141 vqrshld_u64 (uint64_t __a, uint64_t __b)
20143 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20146 /* vqrshrn */
20148 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20149 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20151 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20154 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20155 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20157 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20160 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20161 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20163 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20166 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20167 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20169 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20172 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20173 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20175 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20178 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20179 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20181 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20184 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20185 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
20187 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20190 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20191 vqrshrns_n_s32 (int32x1_t __a, const int __b)
20193 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20196 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20197 vqrshrnd_n_s64 (int64_t __a, const int __b)
20199 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20202 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20203 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
20205 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20208 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20209 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
20211 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20214 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20215 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20217 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20220 /* vqrshrun */
20222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20223 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20225 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20228 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20229 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20231 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20235 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20237 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20240 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20241 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
20243 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20246 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20247 vqrshruns_n_s32 (int32x1_t __a, const int __b)
20249 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20252 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20253 vqrshrund_n_s64 (int64_t __a, const int __b)
20255 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20258 /* vqshl */
20260 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20261 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20263 return __builtin_aarch64_sqshlv8qi (__a, __b);
20266 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20267 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20269 return __builtin_aarch64_sqshlv4hi (__a, __b);
20272 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20273 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20275 return __builtin_aarch64_sqshlv2si (__a, __b);
20278 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20279 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20281 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20284 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20285 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20287 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20290 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20291 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20293 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20296 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20297 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20299 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20302 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20303 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20305 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20308 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20309 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20311 return __builtin_aarch64_sqshlv16qi (__a, __b);
20314 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20315 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20317 return __builtin_aarch64_sqshlv8hi (__a, __b);
20320 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20321 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20323 return __builtin_aarch64_sqshlv4si (__a, __b);
20326 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20327 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20329 return __builtin_aarch64_sqshlv2di (__a, __b);
20332 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20333 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20335 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20338 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20339 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20341 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20345 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20347 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20350 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20351 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20353 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20356 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20357 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
20359 return __builtin_aarch64_sqshlqi (__a, __b);
20362 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20363 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
20365 return __builtin_aarch64_sqshlhi (__a, __b);
20368 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20369 vqshls_s32 (int32x1_t __a, int32x1_t __b)
20371 return __builtin_aarch64_sqshlsi (__a, __b);
20374 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20375 vqshld_s64 (int64_t __a, int64_t __b)
20377 return __builtin_aarch64_sqshldi (__a, __b);
20380 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20381 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20383 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20386 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20387 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20389 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20392 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20393 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
20395 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20398 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20399 vqshld_u64 (uint64_t __a, uint64_t __b)
20401 return __builtin_aarch64_uqshldi_uus (__a, __b);
20404 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20405 vqshl_n_s8 (int8x8_t __a, const int __b)
20407 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20410 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20411 vqshl_n_s16 (int16x4_t __a, const int __b)
20413 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20416 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20417 vqshl_n_s32 (int32x2_t __a, const int __b)
20419 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20422 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20423 vqshl_n_s64 (int64x1_t __a, const int __b)
20425 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20428 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20429 vqshl_n_u8 (uint8x8_t __a, const int __b)
20431 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20434 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20435 vqshl_n_u16 (uint16x4_t __a, const int __b)
20437 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20440 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20441 vqshl_n_u32 (uint32x2_t __a, const int __b)
20443 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20446 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20447 vqshl_n_u64 (uint64x1_t __a, const int __b)
20449 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20452 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20453 vqshlq_n_s8 (int8x16_t __a, const int __b)
20455 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20458 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20459 vqshlq_n_s16 (int16x8_t __a, const int __b)
20461 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20464 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20465 vqshlq_n_s32 (int32x4_t __a, const int __b)
20467 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20470 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20471 vqshlq_n_s64 (int64x2_t __a, const int __b)
20473 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20476 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20477 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20479 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20482 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20483 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20485 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20488 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20489 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20491 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20494 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20495 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20497 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20500 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20501 vqshlb_n_s8 (int8x1_t __a, const int __b)
20503 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20506 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20507 vqshlh_n_s16 (int16x1_t __a, const int __b)
20509 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20512 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20513 vqshls_n_s32 (int32x1_t __a, const int __b)
20515 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20518 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20519 vqshld_n_s64 (int64_t __a, const int __b)
20521 return __builtin_aarch64_sqshl_ndi (__a, __b);
20524 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20525 vqshlb_n_u8 (uint8x1_t __a, const int __b)
20527 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20530 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20531 vqshlh_n_u16 (uint16x1_t __a, const int __b)
20533 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20536 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20537 vqshls_n_u32 (uint32x1_t __a, const int __b)
20539 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20542 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20543 vqshld_n_u64 (uint64_t __a, const int __b)
20545 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20548 /* vqshlu */
20550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20551 vqshlu_n_s8 (int8x8_t __a, const int __b)
20553 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20556 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20557 vqshlu_n_s16 (int16x4_t __a, const int __b)
20559 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20562 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20563 vqshlu_n_s32 (int32x2_t __a, const int __b)
20565 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20568 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20569 vqshlu_n_s64 (int64x1_t __a, const int __b)
20571 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20574 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20575 vqshluq_n_s8 (int8x16_t __a, const int __b)
20577 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20580 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20581 vqshluq_n_s16 (int16x8_t __a, const int __b)
20583 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20586 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20587 vqshluq_n_s32 (int32x4_t __a, const int __b)
20589 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20592 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20593 vqshluq_n_s64 (int64x2_t __a, const int __b)
20595 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20598 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20599 vqshlub_n_s8 (int8x1_t __a, const int __b)
20601 return (int8x1_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20604 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20605 vqshluh_n_s16 (int16x1_t __a, const int __b)
20607 return (int16x1_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20610 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20611 vqshlus_n_s32 (int32x1_t __a, const int __b)
20613 return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20616 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20617 vqshlud_n_s64 (int64_t __a, const int __b)
20619 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20622 /* vqshrn */
20624 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20625 vqshrn_n_s16 (int16x8_t __a, const int __b)
20627 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20630 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20631 vqshrn_n_s32 (int32x4_t __a, const int __b)
20633 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20636 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20637 vqshrn_n_s64 (int64x2_t __a, const int __b)
20639 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20642 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20643 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20645 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20649 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20651 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20654 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20655 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20657 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20660 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20661 vqshrnh_n_s16 (int16x1_t __a, const int __b)
20663 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20666 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20667 vqshrns_n_s32 (int32x1_t __a, const int __b)
20669 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20672 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20673 vqshrnd_n_s64 (int64_t __a, const int __b)
20675 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20678 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20679 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
20681 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20684 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20685 vqshrns_n_u32 (uint32x1_t __a, const int __b)
20687 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20690 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20691 vqshrnd_n_u64 (uint64_t __a, const int __b)
20693 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20696 /* vqshrun */
20698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20699 vqshrun_n_s16 (int16x8_t __a, const int __b)
20701 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20705 vqshrun_n_s32 (int32x4_t __a, const int __b)
20707 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20711 vqshrun_n_s64 (int64x2_t __a, const int __b)
20713 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20716 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20717 vqshrunh_n_s16 (int16x1_t __a, const int __b)
20719 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20722 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20723 vqshruns_n_s32 (int32x1_t __a, const int __b)
20725 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20728 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20729 vqshrund_n_s64 (int64_t __a, const int __b)
20731 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20734 /* vqsub */
20736 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20737 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
20739 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
20742 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20743 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
20745 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
20748 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20749 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
20751 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
20754 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20755 vqsubd_s64 (int64_t __a, int64_t __b)
20757 return __builtin_aarch64_sqsubdi (__a, __b);
20760 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20761 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
20763 return (uint8x1_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20766 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20767 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
20769 return (uint16x1_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20772 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20773 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
20775 return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20778 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20779 vqsubd_u64 (uint64_t __a, uint64_t __b)
20781 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20784 /* vrecpe */
20786 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20787 vrecpes_f32 (float32_t __a)
20789 return __builtin_aarch64_frecpesf (__a);
20792 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20793 vrecped_f64 (float64_t __a)
20795 return __builtin_aarch64_frecpedf (__a);
20798 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20799 vrecpe_f32 (float32x2_t __a)
20801 return __builtin_aarch64_frecpev2sf (__a);
20804 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20805 vrecpeq_f32 (float32x4_t __a)
20807 return __builtin_aarch64_frecpev4sf (__a);
20810 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20811 vrecpeq_f64 (float64x2_t __a)
20813 return __builtin_aarch64_frecpev2df (__a);
20816 /* vrecps */
20818 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20819 vrecpss_f32 (float32_t __a, float32_t __b)
20821 return __builtin_aarch64_frecpssf (__a, __b);
20824 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20825 vrecpsd_f64 (float64_t __a, float64_t __b)
20827 return __builtin_aarch64_frecpsdf (__a, __b);
20830 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20831 vrecps_f32 (float32x2_t __a, float32x2_t __b)
20833 return __builtin_aarch64_frecpsv2sf (__a, __b);
20836 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20837 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
20839 return __builtin_aarch64_frecpsv4sf (__a, __b);
20842 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20843 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
20845 return __builtin_aarch64_frecpsv2df (__a, __b);
20848 /* vrecpx */
20850 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20851 vrecpxs_f32 (float32_t __a)
20853 return __builtin_aarch64_frecpxsf (__a);
20856 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20857 vrecpxd_f64 (float64_t __a)
20859 return __builtin_aarch64_frecpxdf (__a);
20863 /* vrev */
20865 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20866 vrev16_p8 (poly8x8_t a)
20868 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20871 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20872 vrev16_s8 (int8x8_t a)
20874 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20877 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20878 vrev16_u8 (uint8x8_t a)
20880 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20883 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20884 vrev16q_p8 (poly8x16_t a)
20886 return __builtin_shuffle (a,
20887 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20890 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20891 vrev16q_s8 (int8x16_t a)
20893 return __builtin_shuffle (a,
20894 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20897 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20898 vrev16q_u8 (uint8x16_t a)
20900 return __builtin_shuffle (a,
20901 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20904 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20905 vrev32_p8 (poly8x8_t a)
20907 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20910 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20911 vrev32_p16 (poly16x4_t a)
20913 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20916 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20917 vrev32_s8 (int8x8_t a)
20919 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20922 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20923 vrev32_s16 (int16x4_t a)
20925 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20928 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20929 vrev32_u8 (uint8x8_t a)
20931 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20934 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20935 vrev32_u16 (uint16x4_t a)
20937 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20940 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20941 vrev32q_p8 (poly8x16_t a)
20943 return __builtin_shuffle (a,
20944 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20947 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20948 vrev32q_p16 (poly16x8_t a)
20950 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20954 vrev32q_s8 (int8x16_t a)
20956 return __builtin_shuffle (a,
20957 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20961 vrev32q_s16 (int16x8_t a)
20963 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20966 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20967 vrev32q_u8 (uint8x16_t a)
20969 return __builtin_shuffle (a,
20970 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20973 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20974 vrev32q_u16 (uint16x8_t a)
20976 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20979 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20980 vrev64_f32 (float32x2_t a)
20982 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20985 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20986 vrev64_p8 (poly8x8_t a)
20988 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20991 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20992 vrev64_p16 (poly16x4_t a)
20994 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20997 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20998 vrev64_s8 (int8x8_t a)
21000 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21003 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21004 vrev64_s16 (int16x4_t a)
21006 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21009 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21010 vrev64_s32 (int32x2_t a)
21012 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21015 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21016 vrev64_u8 (uint8x8_t a)
21018 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21021 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21022 vrev64_u16 (uint16x4_t a)
21024 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21027 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21028 vrev64_u32 (uint32x2_t a)
21030 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21033 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21034 vrev64q_f32 (float32x4_t a)
21036 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21039 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21040 vrev64q_p8 (poly8x16_t a)
21042 return __builtin_shuffle (a,
21043 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21046 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21047 vrev64q_p16 (poly16x8_t a)
21049 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21052 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21053 vrev64q_s8 (int8x16_t a)
21055 return __builtin_shuffle (a,
21056 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21059 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21060 vrev64q_s16 (int16x8_t a)
21062 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21065 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21066 vrev64q_s32 (int32x4_t a)
21068 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21071 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21072 vrev64q_u8 (uint8x16_t a)
21074 return __builtin_shuffle (a,
21075 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21078 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21079 vrev64q_u16 (uint16x8_t a)
21081 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21084 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21085 vrev64q_u32 (uint32x4_t a)
21087 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21090 /* vrnd */
21092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21093 vrnd_f32 (float32x2_t __a)
21095 return __builtin_aarch64_btruncv2sf (__a);
21098 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21099 vrnd_f64 (float64x1_t __a)
21101 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21104 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21105 vrndq_f32 (float32x4_t __a)
21107 return __builtin_aarch64_btruncv4sf (__a);
21110 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21111 vrndq_f64 (float64x2_t __a)
21113 return __builtin_aarch64_btruncv2df (__a);
21116 /* vrnda */
21118 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21119 vrnda_f32 (float32x2_t __a)
21121 return __builtin_aarch64_roundv2sf (__a);
21124 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21125 vrnda_f64 (float64x1_t __a)
21127 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21130 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21131 vrndaq_f32 (float32x4_t __a)
21133 return __builtin_aarch64_roundv4sf (__a);
21136 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21137 vrndaq_f64 (float64x2_t __a)
21139 return __builtin_aarch64_roundv2df (__a);
21142 /* vrndi */
21144 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21145 vrndi_f32 (float32x2_t __a)
21147 return __builtin_aarch64_nearbyintv2sf (__a);
21150 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21151 vrndi_f64 (float64x1_t __a)
21153 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21156 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21157 vrndiq_f32 (float32x4_t __a)
21159 return __builtin_aarch64_nearbyintv4sf (__a);
21162 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21163 vrndiq_f64 (float64x2_t __a)
21165 return __builtin_aarch64_nearbyintv2df (__a);
21168 /* vrndm */
21170 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21171 vrndm_f32 (float32x2_t __a)
21173 return __builtin_aarch64_floorv2sf (__a);
21176 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21177 vrndm_f64 (float64x1_t __a)
21179 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21182 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21183 vrndmq_f32 (float32x4_t __a)
21185 return __builtin_aarch64_floorv4sf (__a);
21188 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21189 vrndmq_f64 (float64x2_t __a)
21191 return __builtin_aarch64_floorv2df (__a);
21194 /* vrndn */
21196 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21197 vrndn_f32 (float32x2_t __a)
21199 return __builtin_aarch64_frintnv2sf (__a);
21202 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21203 vrndn_f64 (float64x1_t __a)
21205 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21208 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21209 vrndnq_f32 (float32x4_t __a)
21211 return __builtin_aarch64_frintnv4sf (__a);
21214 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21215 vrndnq_f64 (float64x2_t __a)
21217 return __builtin_aarch64_frintnv2df (__a);
21220 /* vrndp */
21222 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21223 vrndp_f32 (float32x2_t __a)
21225 return __builtin_aarch64_ceilv2sf (__a);
21228 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21229 vrndp_f64 (float64x1_t __a)
21231 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21234 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21235 vrndpq_f32 (float32x4_t __a)
21237 return __builtin_aarch64_ceilv4sf (__a);
21240 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21241 vrndpq_f64 (float64x2_t __a)
21243 return __builtin_aarch64_ceilv2df (__a);
21246 /* vrndx */
21248 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21249 vrndx_f32 (float32x2_t __a)
21251 return __builtin_aarch64_rintv2sf (__a);
21254 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21255 vrndx_f64 (float64x1_t __a)
21257 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21260 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21261 vrndxq_f32 (float32x4_t __a)
21263 return __builtin_aarch64_rintv4sf (__a);
21266 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21267 vrndxq_f64 (float64x2_t __a)
21269 return __builtin_aarch64_rintv2df (__a);
21272 /* vrshl */
21274 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21275 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21277 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21280 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21281 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21283 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21286 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21287 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21289 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21292 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21293 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21295 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21298 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21299 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21301 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21305 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21307 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21311 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21313 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21316 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21317 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21319 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21322 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21323 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21325 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21328 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21329 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21331 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21335 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21337 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21340 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21341 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21343 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21346 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21347 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21349 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21352 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21353 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21355 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21358 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21359 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21361 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21364 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21365 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21367 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21370 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21371 vrshld_s64 (int64_t __a, int64_t __b)
21373 return __builtin_aarch64_srshldi (__a, __b);
21376 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21377 vrshld_u64 (uint64_t __a, int64_t __b)
21379 return __builtin_aarch64_urshldi_uus (__a, __b);
21382 /* vrshr */
21384 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21385 vrshr_n_s8 (int8x8_t __a, const int __b)
21387 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21390 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21391 vrshr_n_s16 (int16x4_t __a, const int __b)
21393 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21396 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21397 vrshr_n_s32 (int32x2_t __a, const int __b)
21399 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21403 vrshr_n_s64 (int64x1_t __a, const int __b)
21405 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21409 vrshr_n_u8 (uint8x8_t __a, const int __b)
21411 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21414 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21415 vrshr_n_u16 (uint16x4_t __a, const int __b)
21417 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21420 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21421 vrshr_n_u32 (uint32x2_t __a, const int __b)
21423 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21426 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21427 vrshr_n_u64 (uint64x1_t __a, const int __b)
21429 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21432 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21433 vrshrq_n_s8 (int8x16_t __a, const int __b)
21435 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21438 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21439 vrshrq_n_s16 (int16x8_t __a, const int __b)
21441 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21444 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21445 vrshrq_n_s32 (int32x4_t __a, const int __b)
21447 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21450 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21451 vrshrq_n_s64 (int64x2_t __a, const int __b)
21453 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21456 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21457 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21459 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21462 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21463 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21465 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21468 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21469 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21471 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21474 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21475 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21477 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21480 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21481 vrshrd_n_s64 (int64_t __a, const int __b)
21483 return __builtin_aarch64_srshr_ndi (__a, __b);
21486 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21487 vrshrd_n_u64 (uint64_t __a, const int __b)
21489 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21492 /* vrsra */
21494 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21495 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21497 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21500 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21501 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21503 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21506 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21507 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21509 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21512 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21513 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21515 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21518 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21519 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21521 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21525 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21527 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21530 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21531 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21533 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21536 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21537 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21539 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21542 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21543 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21545 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21548 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21549 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21551 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21554 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21555 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21557 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21560 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21561 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21563 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21566 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21567 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21569 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21573 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21575 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21578 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21579 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21581 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21584 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21585 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21587 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21590 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21591 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21593 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21596 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21597 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21599 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21602 #ifdef __ARM_FEATURE_CRYPTO
21604 /* vsha1 */
21606 static __inline uint32x4_t
21607 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21609 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21611 static __inline uint32x4_t
21612 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21614 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21616 static __inline uint32x4_t
21617 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21619 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21622 static __inline uint32_t
21623 vsha1h_u32 (uint32_t hash_e)
21625 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21628 static __inline uint32x4_t
21629 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21631 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21634 static __inline uint32x4_t
21635 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21637 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21640 static __inline uint32x4_t
21641 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21643 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21646 static __inline uint32x4_t
21647 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21649 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21652 static __inline uint32x4_t
21653 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21655 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21658 static __inline uint32x4_t
21659 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21661 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21664 static __inline poly128_t
21665 vmull_p64 (poly64_t a, poly64_t b)
21667 return
21668 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21671 static __inline poly128_t
21672 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21674 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21677 #endif
21679 /* vshl */
21681 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21682 vshl_n_s8 (int8x8_t __a, const int __b)
21684 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21687 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21688 vshl_n_s16 (int16x4_t __a, const int __b)
21690 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21693 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21694 vshl_n_s32 (int32x2_t __a, const int __b)
21696 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21699 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21700 vshl_n_s64 (int64x1_t __a, const int __b)
21702 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21705 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21706 vshl_n_u8 (uint8x8_t __a, const int __b)
21708 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21711 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21712 vshl_n_u16 (uint16x4_t __a, const int __b)
21714 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21717 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21718 vshl_n_u32 (uint32x2_t __a, const int __b)
21720 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21723 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21724 vshl_n_u64 (uint64x1_t __a, const int __b)
21726 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21729 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21730 vshlq_n_s8 (int8x16_t __a, const int __b)
21732 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21735 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21736 vshlq_n_s16 (int16x8_t __a, const int __b)
21738 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21741 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21742 vshlq_n_s32 (int32x4_t __a, const int __b)
21744 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21747 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21748 vshlq_n_s64 (int64x2_t __a, const int __b)
21750 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21753 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21754 vshlq_n_u8 (uint8x16_t __a, const int __b)
21756 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21759 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21760 vshlq_n_u16 (uint16x8_t __a, const int __b)
21762 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21765 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21766 vshlq_n_u32 (uint32x4_t __a, const int __b)
21768 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21771 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21772 vshlq_n_u64 (uint64x2_t __a, const int __b)
21774 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
21777 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21778 vshld_n_s64 (int64_t __a, const int __b)
21780 return __builtin_aarch64_ashldi (__a, __b);
21783 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21784 vshld_n_u64 (uint64_t __a, const int __b)
21786 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
21789 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21790 vshl_s8 (int8x8_t __a, int8x8_t __b)
21792 return __builtin_aarch64_sshlv8qi (__a, __b);
21795 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21796 vshl_s16 (int16x4_t __a, int16x4_t __b)
21798 return __builtin_aarch64_sshlv4hi (__a, __b);
21801 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21802 vshl_s32 (int32x2_t __a, int32x2_t __b)
21804 return __builtin_aarch64_sshlv2si (__a, __b);
21807 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21808 vshl_s64 (int64x1_t __a, int64x1_t __b)
21810 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
21813 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21814 vshl_u8 (uint8x8_t __a, int8x8_t __b)
21816 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
21819 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21820 vshl_u16 (uint16x4_t __a, int16x4_t __b)
21822 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
21825 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21826 vshl_u32 (uint32x2_t __a, int32x2_t __b)
21828 return __builtin_aarch64_ushlv2si_uus (__a, __b);
21831 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21832 vshl_u64 (uint64x1_t __a, int64x1_t __b)
21834 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
21837 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21838 vshlq_s8 (int8x16_t __a, int8x16_t __b)
21840 return __builtin_aarch64_sshlv16qi (__a, __b);
21843 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21844 vshlq_s16 (int16x8_t __a, int16x8_t __b)
21846 return __builtin_aarch64_sshlv8hi (__a, __b);
21849 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21850 vshlq_s32 (int32x4_t __a, int32x4_t __b)
21852 return __builtin_aarch64_sshlv4si (__a, __b);
21855 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21856 vshlq_s64 (int64x2_t __a, int64x2_t __b)
21858 return __builtin_aarch64_sshlv2di (__a, __b);
21861 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21862 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
21864 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
21867 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21868 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
21870 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
21873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21874 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
21876 return __builtin_aarch64_ushlv4si_uus (__a, __b);
21879 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21880 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
21882 return __builtin_aarch64_ushlv2di_uus (__a, __b);
21885 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21886 vshld_s64 (int64_t __a, int64_t __b)
21888 return __builtin_aarch64_sshldi (__a, __b);
21891 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21892 vshld_u64 (uint64_t __a, uint64_t __b)
21894 return __builtin_aarch64_ushldi_uus (__a, __b);
21897 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21898 vshll_high_n_s8 (int8x16_t __a, const int __b)
21900 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
21903 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21904 vshll_high_n_s16 (int16x8_t __a, const int __b)
21906 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
21909 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21910 vshll_high_n_s32 (int32x4_t __a, const int __b)
21912 return __builtin_aarch64_sshll2_nv4si (__a, __b);
21915 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21916 vshll_high_n_u8 (uint8x16_t __a, const int __b)
21918 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
21921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21922 vshll_high_n_u16 (uint16x8_t __a, const int __b)
21924 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
21927 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21928 vshll_high_n_u32 (uint32x4_t __a, const int __b)
21930 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
21933 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21934 vshll_n_s8 (int8x8_t __a, const int __b)
21936 return __builtin_aarch64_sshll_nv8qi (__a, __b);
21939 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21940 vshll_n_s16 (int16x4_t __a, const int __b)
21942 return __builtin_aarch64_sshll_nv4hi (__a, __b);
21945 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21946 vshll_n_s32 (int32x2_t __a, const int __b)
21948 return __builtin_aarch64_sshll_nv2si (__a, __b);
21951 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21952 vshll_n_u8 (uint8x8_t __a, const int __b)
21954 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
21957 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21958 vshll_n_u16 (uint16x4_t __a, const int __b)
21960 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
21963 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21964 vshll_n_u32 (uint32x2_t __a, const int __b)
21966 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
21969 /* vshr */
21971 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21972 vshr_n_s8 (int8x8_t __a, const int __b)
21974 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
21977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21978 vshr_n_s16 (int16x4_t __a, const int __b)
21980 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
21983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21984 vshr_n_s32 (int32x2_t __a, const int __b)
21986 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
21989 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21990 vshr_n_s64 (int64x1_t __a, const int __b)
21992 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
21995 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21996 vshr_n_u8 (uint8x8_t __a, const int __b)
21998 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
22001 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22002 vshr_n_u16 (uint16x4_t __a, const int __b)
22004 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22007 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22008 vshr_n_u32 (uint32x2_t __a, const int __b)
22010 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22013 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22014 vshr_n_u64 (uint64x1_t __a, const int __b)
22016 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22019 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22020 vshrq_n_s8 (int8x16_t __a, const int __b)
22022 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22025 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22026 vshrq_n_s16 (int16x8_t __a, const int __b)
22028 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22031 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22032 vshrq_n_s32 (int32x4_t __a, const int __b)
22034 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22037 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22038 vshrq_n_s64 (int64x2_t __a, const int __b)
22040 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22043 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22044 vshrq_n_u8 (uint8x16_t __a, const int __b)
22046 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22049 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22050 vshrq_n_u16 (uint16x8_t __a, const int __b)
22052 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22055 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22056 vshrq_n_u32 (uint32x4_t __a, const int __b)
22058 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22061 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22062 vshrq_n_u64 (uint64x2_t __a, const int __b)
22064 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22067 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22068 vshrd_n_s64 (int64_t __a, const int __b)
22070 return __builtin_aarch64_ashr_simddi (__a, __b);
22073 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22074 vshrd_n_u64 (uint64_t __a, const int __b)
22076 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22079 /* vsli */
22081 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22082 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22084 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22087 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22088 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22090 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22093 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22094 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22096 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22099 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22100 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22102 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22105 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22106 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22108 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22111 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22112 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22114 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22118 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22120 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22123 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22124 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22126 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22129 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22130 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22132 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22135 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22136 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22138 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22142 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22144 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22147 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22148 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22150 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22153 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22154 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22156 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22159 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22160 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22162 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22165 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22166 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22168 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22171 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22172 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22174 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22177 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22178 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22180 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22183 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22184 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22186 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22189 /* vsqadd */
22191 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22192 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22194 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22197 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22198 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22200 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22203 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22204 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22206 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22209 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22210 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22212 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22215 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22216 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22218 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22222 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22224 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22227 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22228 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22230 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22233 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22234 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22236 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22239 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22240 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
22242 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22245 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22246 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
22248 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22251 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22252 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
22254 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22257 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22258 vsqaddd_u64 (uint64_t __a, int64_t __b)
22260 return __builtin_aarch64_usqadddi_uus (__a, __b);
22263 /* vsqrt */
22264 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22265 vsqrt_f32 (float32x2_t a)
22267 return __builtin_aarch64_sqrtv2sf (a);
22270 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22271 vsqrtq_f32 (float32x4_t a)
22273 return __builtin_aarch64_sqrtv4sf (a);
22276 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22277 vsqrtq_f64 (float64x2_t a)
22279 return __builtin_aarch64_sqrtv2df (a);
22282 /* vsra */
22284 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22285 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22287 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22290 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22291 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22293 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22296 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22297 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22299 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22302 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22303 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22305 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22308 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22309 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22311 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22314 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22315 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22317 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22320 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22321 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22323 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22326 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22327 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22329 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22333 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22335 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22339 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22341 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22344 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22345 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22347 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22350 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22351 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22353 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22356 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22357 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22359 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22362 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22363 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22365 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22369 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22371 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22374 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22375 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22377 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22380 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22381 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22383 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22386 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22387 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22389 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22392 /* vsri */
22394 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22395 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22397 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22400 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22401 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22403 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22406 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22407 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22409 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22412 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22413 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22415 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22418 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22419 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22421 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22425 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22427 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22430 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22431 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22433 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22436 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22437 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22439 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22442 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22443 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22445 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22448 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22449 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22451 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22454 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22455 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22457 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22460 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22461 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22463 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22466 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22467 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22469 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22472 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22473 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22475 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22478 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22479 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22481 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22484 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22485 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22487 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22490 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22491 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22493 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22496 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22497 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22499 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22502 /* vst1 */
22504 __extension__ static __inline void __attribute__ ((__always_inline__))
22505 vst1_f32 (float32_t *a, float32x2_t b)
22507 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22510 __extension__ static __inline void __attribute__ ((__always_inline__))
22511 vst1_f64 (float64_t *a, float64x1_t b)
22513 *a = b[0];
22516 __extension__ static __inline void __attribute__ ((__always_inline__))
22517 vst1_p8 (poly8_t *a, poly8x8_t b)
22519 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22520 (int8x8_t) b);
22523 __extension__ static __inline void __attribute__ ((__always_inline__))
22524 vst1_p16 (poly16_t *a, poly16x4_t b)
22526 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22527 (int16x4_t) b);
22530 __extension__ static __inline void __attribute__ ((__always_inline__))
22531 vst1_s8 (int8_t *a, int8x8_t b)
22533 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22536 __extension__ static __inline void __attribute__ ((__always_inline__))
22537 vst1_s16 (int16_t *a, int16x4_t b)
22539 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22542 __extension__ static __inline void __attribute__ ((__always_inline__))
22543 vst1_s32 (int32_t *a, int32x2_t b)
22545 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22548 __extension__ static __inline void __attribute__ ((__always_inline__))
22549 vst1_s64 (int64_t *a, int64x1_t b)
22551 *a = b[0];
22554 __extension__ static __inline void __attribute__ ((__always_inline__))
22555 vst1_u8 (uint8_t *a, uint8x8_t b)
22557 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22558 (int8x8_t) b);
22561 __extension__ static __inline void __attribute__ ((__always_inline__))
22562 vst1_u16 (uint16_t *a, uint16x4_t b)
22564 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22565 (int16x4_t) b);
22568 __extension__ static __inline void __attribute__ ((__always_inline__))
22569 vst1_u32 (uint32_t *a, uint32x2_t b)
22571 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22572 (int32x2_t) b);
22575 __extension__ static __inline void __attribute__ ((__always_inline__))
22576 vst1_u64 (uint64_t *a, uint64x1_t b)
22578 *a = b[0];
22581 __extension__ static __inline void __attribute__ ((__always_inline__))
22582 vst1q_f32 (float32_t *a, float32x4_t b)
22584 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22587 __extension__ static __inline void __attribute__ ((__always_inline__))
22588 vst1q_f64 (float64_t *a, float64x2_t b)
22590 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22593 /* vst1q */
22595 __extension__ static __inline void __attribute__ ((__always_inline__))
22596 vst1q_p8 (poly8_t *a, poly8x16_t b)
22598 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22599 (int8x16_t) b);
22602 __extension__ static __inline void __attribute__ ((__always_inline__))
22603 vst1q_p16 (poly16_t *a, poly16x8_t b)
22605 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22606 (int16x8_t) b);
22609 __extension__ static __inline void __attribute__ ((__always_inline__))
22610 vst1q_s8 (int8_t *a, int8x16_t b)
22612 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22615 __extension__ static __inline void __attribute__ ((__always_inline__))
22616 vst1q_s16 (int16_t *a, int16x8_t b)
22618 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22621 __extension__ static __inline void __attribute__ ((__always_inline__))
22622 vst1q_s32 (int32_t *a, int32x4_t b)
22624 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22627 __extension__ static __inline void __attribute__ ((__always_inline__))
22628 vst1q_s64 (int64_t *a, int64x2_t b)
22630 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22633 __extension__ static __inline void __attribute__ ((__always_inline__))
22634 vst1q_u8 (uint8_t *a, uint8x16_t b)
22636 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22637 (int8x16_t) b);
22640 __extension__ static __inline void __attribute__ ((__always_inline__))
22641 vst1q_u16 (uint16_t *a, uint16x8_t b)
22643 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22644 (int16x8_t) b);
22647 __extension__ static __inline void __attribute__ ((__always_inline__))
22648 vst1q_u32 (uint32_t *a, uint32x4_t b)
22650 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22651 (int32x4_t) b);
22654 __extension__ static __inline void __attribute__ ((__always_inline__))
22655 vst1q_u64 (uint64_t *a, uint64x2_t b)
22657 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22658 (int64x2_t) b);
22661 /* vstn */
22663 __extension__ static __inline void
22664 vst2_s64 (int64_t * __a, int64x1x2_t val)
22666 __builtin_aarch64_simd_oi __o;
22667 int64x2x2_t temp;
22668 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22669 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22670 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22671 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22672 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22675 __extension__ static __inline void
22676 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
22678 __builtin_aarch64_simd_oi __o;
22679 uint64x2x2_t temp;
22680 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22681 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22682 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22683 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22684 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22687 __extension__ static __inline void
22688 vst2_f64 (float64_t * __a, float64x1x2_t val)
22690 __builtin_aarch64_simd_oi __o;
22691 float64x2x2_t temp;
22692 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22693 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22694 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
22695 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
22696 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
22699 __extension__ static __inline void
22700 vst2_s8 (int8_t * __a, int8x8x2_t val)
22702 __builtin_aarch64_simd_oi __o;
22703 int8x16x2_t temp;
22704 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22705 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22706 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22707 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22708 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22711 __extension__ static __inline void __attribute__ ((__always_inline__))
22712 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
22714 __builtin_aarch64_simd_oi __o;
22715 poly8x16x2_t temp;
22716 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22717 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22718 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22719 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22720 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22723 __extension__ static __inline void __attribute__ ((__always_inline__))
22724 vst2_s16 (int16_t * __a, int16x4x2_t val)
22726 __builtin_aarch64_simd_oi __o;
22727 int16x8x2_t temp;
22728 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22729 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22730 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22731 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22732 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22735 __extension__ static __inline void __attribute__ ((__always_inline__))
22736 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
22738 __builtin_aarch64_simd_oi __o;
22739 poly16x8x2_t temp;
22740 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22741 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22742 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22743 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22744 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22747 __extension__ static __inline void __attribute__ ((__always_inline__))
22748 vst2_s32 (int32_t * __a, int32x2x2_t val)
22750 __builtin_aarch64_simd_oi __o;
22751 int32x4x2_t temp;
22752 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22753 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22754 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22755 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22756 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22759 __extension__ static __inline void __attribute__ ((__always_inline__))
22760 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
22762 __builtin_aarch64_simd_oi __o;
22763 uint8x16x2_t temp;
22764 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22765 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22766 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22767 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22768 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22771 __extension__ static __inline void __attribute__ ((__always_inline__))
22772 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
22774 __builtin_aarch64_simd_oi __o;
22775 uint16x8x2_t temp;
22776 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22777 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22778 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22779 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22780 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22783 __extension__ static __inline void __attribute__ ((__always_inline__))
22784 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
22786 __builtin_aarch64_simd_oi __o;
22787 uint32x4x2_t temp;
22788 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22789 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22790 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22791 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22792 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22795 __extension__ static __inline void __attribute__ ((__always_inline__))
22796 vst2_f32 (float32_t * __a, float32x2x2_t val)
22798 __builtin_aarch64_simd_oi __o;
22799 float32x4x2_t temp;
22800 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22801 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22802 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
22803 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
22804 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
22807 __extension__ static __inline void __attribute__ ((__always_inline__))
22808 vst2q_s8 (int8_t * __a, int8x16x2_t val)
22810 __builtin_aarch64_simd_oi __o;
22811 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22812 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22813 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22816 __extension__ static __inline void __attribute__ ((__always_inline__))
22817 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
22819 __builtin_aarch64_simd_oi __o;
22820 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22821 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22822 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22825 __extension__ static __inline void __attribute__ ((__always_inline__))
22826 vst2q_s16 (int16_t * __a, int16x8x2_t val)
22828 __builtin_aarch64_simd_oi __o;
22829 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22830 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22831 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22834 __extension__ static __inline void __attribute__ ((__always_inline__))
22835 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
22837 __builtin_aarch64_simd_oi __o;
22838 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22839 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22840 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22843 __extension__ static __inline void __attribute__ ((__always_inline__))
22844 vst2q_s32 (int32_t * __a, int32x4x2_t val)
22846 __builtin_aarch64_simd_oi __o;
22847 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22848 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22849 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22852 __extension__ static __inline void __attribute__ ((__always_inline__))
22853 vst2q_s64 (int64_t * __a, int64x2x2_t val)
22855 __builtin_aarch64_simd_oi __o;
22856 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22857 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22858 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22861 __extension__ static __inline void __attribute__ ((__always_inline__))
22862 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
22864 __builtin_aarch64_simd_oi __o;
22865 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22866 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22867 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22870 __extension__ static __inline void __attribute__ ((__always_inline__))
22871 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
22873 __builtin_aarch64_simd_oi __o;
22874 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22875 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22876 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22879 __extension__ static __inline void __attribute__ ((__always_inline__))
22880 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
22882 __builtin_aarch64_simd_oi __o;
22883 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22884 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22885 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22888 __extension__ static __inline void __attribute__ ((__always_inline__))
22889 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
22891 __builtin_aarch64_simd_oi __o;
22892 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22893 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22894 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22897 __extension__ static __inline void __attribute__ ((__always_inline__))
22898 vst2q_f32 (float32_t * __a, float32x4x2_t val)
22900 __builtin_aarch64_simd_oi __o;
22901 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
22902 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
22903 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
22906 __extension__ static __inline void __attribute__ ((__always_inline__))
22907 vst2q_f64 (float64_t * __a, float64x2x2_t val)
22909 __builtin_aarch64_simd_oi __o;
22910 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
22911 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
22912 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
22915 __extension__ static __inline void
22916 vst3_s64 (int64_t * __a, int64x1x3_t val)
22918 __builtin_aarch64_simd_ci __o;
22919 int64x2x3_t temp;
22920 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22921 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22922 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
22923 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22924 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22925 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22926 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22929 __extension__ static __inline void
22930 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
22932 __builtin_aarch64_simd_ci __o;
22933 uint64x2x3_t temp;
22934 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22935 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22936 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
22937 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22938 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22939 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22940 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22943 __extension__ static __inline void
22944 vst3_f64 (float64_t * __a, float64x1x3_t val)
22946 __builtin_aarch64_simd_ci __o;
22947 float64x2x3_t temp;
22948 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22949 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22950 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
22951 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
22952 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
22953 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
22954 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
22957 __extension__ static __inline void
22958 vst3_s8 (int8_t * __a, int8x8x3_t val)
22960 __builtin_aarch64_simd_ci __o;
22961 int8x16x3_t temp;
22962 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22963 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22964 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
22965 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22966 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22967 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22968 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22971 __extension__ static __inline void __attribute__ ((__always_inline__))
22972 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
22974 __builtin_aarch64_simd_ci __o;
22975 poly8x16x3_t temp;
22976 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22977 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22978 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
22979 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22980 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22981 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22982 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22985 __extension__ static __inline void __attribute__ ((__always_inline__))
22986 vst3_s16 (int16_t * __a, int16x4x3_t val)
22988 __builtin_aarch64_simd_ci __o;
22989 int16x8x3_t temp;
22990 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22991 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22992 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
22993 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22994 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22995 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22996 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22999 __extension__ static __inline void __attribute__ ((__always_inline__))
23000 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
23002 __builtin_aarch64_simd_ci __o;
23003 poly16x8x3_t temp;
23004 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23005 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23006 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23007 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23008 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23009 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23010 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23013 __extension__ static __inline void __attribute__ ((__always_inline__))
23014 vst3_s32 (int32_t * __a, int32x2x3_t val)
23016 __builtin_aarch64_simd_ci __o;
23017 int32x4x3_t temp;
23018 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23019 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23020 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23021 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23022 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23023 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23024 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23027 __extension__ static __inline void __attribute__ ((__always_inline__))
23028 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23030 __builtin_aarch64_simd_ci __o;
23031 uint8x16x3_t temp;
23032 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23033 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23034 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23035 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23036 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23037 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23038 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23041 __extension__ static __inline void __attribute__ ((__always_inline__))
23042 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23044 __builtin_aarch64_simd_ci __o;
23045 uint16x8x3_t temp;
23046 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23047 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23048 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23049 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23050 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23051 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23052 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23055 __extension__ static __inline void __attribute__ ((__always_inline__))
23056 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23058 __builtin_aarch64_simd_ci __o;
23059 uint32x4x3_t temp;
23060 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23061 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23062 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23063 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23064 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23065 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23066 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23069 __extension__ static __inline void __attribute__ ((__always_inline__))
23070 vst3_f32 (float32_t * __a, float32x2x3_t val)
23072 __builtin_aarch64_simd_ci __o;
23073 float32x4x3_t temp;
23074 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23075 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23076 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23077 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23078 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23079 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23080 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23083 __extension__ static __inline void __attribute__ ((__always_inline__))
23084 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23086 __builtin_aarch64_simd_ci __o;
23087 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23088 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23089 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23090 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23093 __extension__ static __inline void __attribute__ ((__always_inline__))
23094 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23096 __builtin_aarch64_simd_ci __o;
23097 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23098 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23099 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23100 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23103 __extension__ static __inline void __attribute__ ((__always_inline__))
23104 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23106 __builtin_aarch64_simd_ci __o;
23107 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23108 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23109 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23110 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23113 __extension__ static __inline void __attribute__ ((__always_inline__))
23114 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23116 __builtin_aarch64_simd_ci __o;
23117 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23118 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23119 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23120 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23123 __extension__ static __inline void __attribute__ ((__always_inline__))
23124 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23126 __builtin_aarch64_simd_ci __o;
23127 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23128 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23129 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23130 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23133 __extension__ static __inline void __attribute__ ((__always_inline__))
23134 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23136 __builtin_aarch64_simd_ci __o;
23137 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23138 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23139 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23140 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23143 __extension__ static __inline void __attribute__ ((__always_inline__))
23144 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23146 __builtin_aarch64_simd_ci __o;
23147 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23148 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23149 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23150 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23153 __extension__ static __inline void __attribute__ ((__always_inline__))
23154 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23156 __builtin_aarch64_simd_ci __o;
23157 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23158 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23159 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23160 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23163 __extension__ static __inline void __attribute__ ((__always_inline__))
23164 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23166 __builtin_aarch64_simd_ci __o;
23167 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23168 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23169 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23170 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23173 __extension__ static __inline void __attribute__ ((__always_inline__))
23174 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23176 __builtin_aarch64_simd_ci __o;
23177 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23178 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23179 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23180 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23183 __extension__ static __inline void __attribute__ ((__always_inline__))
23184 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23186 __builtin_aarch64_simd_ci __o;
23187 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23188 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23189 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23190 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23193 __extension__ static __inline void __attribute__ ((__always_inline__))
23194 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23196 __builtin_aarch64_simd_ci __o;
23197 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23198 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23199 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23200 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23203 __extension__ static __inline void
23204 vst4_s64 (int64_t * __a, int64x1x4_t val)
23206 __builtin_aarch64_simd_xi __o;
23207 int64x2x4_t temp;
23208 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23209 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23210 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23211 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23212 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23213 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23214 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23215 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23216 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23219 __extension__ static __inline void
23220 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23222 __builtin_aarch64_simd_xi __o;
23223 uint64x2x4_t temp;
23224 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23225 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23226 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23227 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23228 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23229 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23230 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23231 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23232 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23235 __extension__ static __inline void
23236 vst4_f64 (float64_t * __a, float64x1x4_t val)
23238 __builtin_aarch64_simd_xi __o;
23239 float64x2x4_t temp;
23240 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23241 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23242 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23243 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23244 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23245 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23246 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23247 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23248 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23251 __extension__ static __inline void
23252 vst4_s8 (int8_t * __a, int8x8x4_t val)
23254 __builtin_aarch64_simd_xi __o;
23255 int8x16x4_t temp;
23256 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23257 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23258 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23259 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23260 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23261 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23262 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23263 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23264 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23267 __extension__ static __inline void __attribute__ ((__always_inline__))
23268 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23270 __builtin_aarch64_simd_xi __o;
23271 poly8x16x4_t temp;
23272 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23273 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23274 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23275 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23276 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23277 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23278 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23279 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23280 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23283 __extension__ static __inline void __attribute__ ((__always_inline__))
23284 vst4_s16 (int16_t * __a, int16x4x4_t val)
23286 __builtin_aarch64_simd_xi __o;
23287 int16x8x4_t temp;
23288 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23289 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23290 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23291 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23292 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23293 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23294 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23295 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23296 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23299 __extension__ static __inline void __attribute__ ((__always_inline__))
23300 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23302 __builtin_aarch64_simd_xi __o;
23303 poly16x8x4_t temp;
23304 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23305 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23306 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23307 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23308 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23309 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23310 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23311 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23312 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23315 __extension__ static __inline void __attribute__ ((__always_inline__))
23316 vst4_s32 (int32_t * __a, int32x2x4_t val)
23318 __builtin_aarch64_simd_xi __o;
23319 int32x4x4_t temp;
23320 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23321 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23322 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23323 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23324 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23325 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23326 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23327 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23328 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23331 __extension__ static __inline void __attribute__ ((__always_inline__))
23332 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23334 __builtin_aarch64_simd_xi __o;
23335 uint8x16x4_t temp;
23336 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23337 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23338 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23339 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23340 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23341 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23342 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23343 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23344 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23347 __extension__ static __inline void __attribute__ ((__always_inline__))
23348 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23350 __builtin_aarch64_simd_xi __o;
23351 uint16x8x4_t temp;
23352 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23353 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23354 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23355 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23356 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23357 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23358 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23359 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23360 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23363 __extension__ static __inline void __attribute__ ((__always_inline__))
23364 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23366 __builtin_aarch64_simd_xi __o;
23367 uint32x4x4_t temp;
23368 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23369 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23370 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23371 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23372 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23373 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23374 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23375 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23376 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23379 __extension__ static __inline void __attribute__ ((__always_inline__))
23380 vst4_f32 (float32_t * __a, float32x2x4_t val)
23382 __builtin_aarch64_simd_xi __o;
23383 float32x4x4_t temp;
23384 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23385 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23386 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23387 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23388 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23389 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23390 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23391 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23392 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23395 __extension__ static __inline void __attribute__ ((__always_inline__))
23396 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23398 __builtin_aarch64_simd_xi __o;
23399 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23400 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23401 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23402 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23403 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23406 __extension__ static __inline void __attribute__ ((__always_inline__))
23407 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23409 __builtin_aarch64_simd_xi __o;
23410 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23411 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23412 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23413 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23414 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23417 __extension__ static __inline void __attribute__ ((__always_inline__))
23418 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23420 __builtin_aarch64_simd_xi __o;
23421 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23422 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23423 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23424 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23425 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23428 __extension__ static __inline void __attribute__ ((__always_inline__))
23429 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23431 __builtin_aarch64_simd_xi __o;
23432 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23433 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23434 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23435 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23436 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23439 __extension__ static __inline void __attribute__ ((__always_inline__))
23440 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23442 __builtin_aarch64_simd_xi __o;
23443 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23444 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23445 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23446 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23447 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23450 __extension__ static __inline void __attribute__ ((__always_inline__))
23451 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23453 __builtin_aarch64_simd_xi __o;
23454 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23455 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23456 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23457 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23458 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23461 __extension__ static __inline void __attribute__ ((__always_inline__))
23462 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23464 __builtin_aarch64_simd_xi __o;
23465 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23466 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23467 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23468 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23469 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23472 __extension__ static __inline void __attribute__ ((__always_inline__))
23473 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23475 __builtin_aarch64_simd_xi __o;
23476 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23477 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23478 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23479 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23480 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23483 __extension__ static __inline void __attribute__ ((__always_inline__))
23484 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23486 __builtin_aarch64_simd_xi __o;
23487 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23488 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23489 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23490 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23491 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23494 __extension__ static __inline void __attribute__ ((__always_inline__))
23495 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23497 __builtin_aarch64_simd_xi __o;
23498 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23499 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23500 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23501 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23502 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23505 __extension__ static __inline void __attribute__ ((__always_inline__))
23506 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23508 __builtin_aarch64_simd_xi __o;
23509 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23510 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23511 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23512 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23513 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23516 __extension__ static __inline void __attribute__ ((__always_inline__))
23517 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23519 __builtin_aarch64_simd_xi __o;
23520 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23521 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23522 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
23523 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
23524 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
23527 /* vsub */
23529 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23530 vsubd_s64 (int64_t __a, int64_t __b)
23532 return __a - __b;
23535 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23536 vsubd_u64 (uint64_t __a, uint64_t __b)
23538 return __a - __b;
23541 /* vtbx1 */
23543 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23544 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
23546 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23547 vmov_n_u8 (8));
23548 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
23550 return vbsl_s8 (__mask, __tbl, __r);
23553 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23554 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
23556 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23557 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
23559 return vbsl_u8 (__mask, __tbl, __r);
23562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23563 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
23565 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23566 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
23568 return vbsl_p8 (__mask, __tbl, __r);
23571 /* vtbx3 */
23573 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23574 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
23576 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23577 vmov_n_u8 (24));
23578 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
23580 return vbsl_s8 (__mask, __tbl, __r);
23583 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23584 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
23586 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23587 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
23589 return vbsl_u8 (__mask, __tbl, __r);
23592 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23593 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
23595 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23596 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
23598 return vbsl_p8 (__mask, __tbl, __r);
23601 /* vtrn */
23603 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23604 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
23606 #ifdef __AARCH64EB__
23607 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23608 #else
23609 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23610 #endif
23613 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23614 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
23616 #ifdef __AARCH64EB__
23617 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23618 #else
23619 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23620 #endif
23623 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23624 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
23626 #ifdef __AARCH64EB__
23627 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23628 #else
23629 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23630 #endif
23633 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23634 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
23636 #ifdef __AARCH64EB__
23637 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23638 #else
23639 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23640 #endif
23643 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23644 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
23646 #ifdef __AARCH64EB__
23647 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23648 #else
23649 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23650 #endif
23653 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23654 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
23656 #ifdef __AARCH64EB__
23657 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23658 #else
23659 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23660 #endif
23663 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23664 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
23666 #ifdef __AARCH64EB__
23667 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23668 #else
23669 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23670 #endif
23673 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23674 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
23676 #ifdef __AARCH64EB__
23677 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23678 #else
23679 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23680 #endif
23683 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23684 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
23686 #ifdef __AARCH64EB__
23687 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23688 #else
23689 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23690 #endif
23693 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23694 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
23696 #ifdef __AARCH64EB__
23697 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23698 #else
23699 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23700 #endif
23703 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23704 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
23706 #ifdef __AARCH64EB__
23707 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23708 #else
23709 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23710 #endif
23713 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23714 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
23716 #ifdef __AARCH64EB__
23717 return __builtin_shuffle (__a, __b,
23718 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23719 #else
23720 return __builtin_shuffle (__a, __b,
23721 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23722 #endif
23725 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23726 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
23728 #ifdef __AARCH64EB__
23729 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23730 #else
23731 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23732 #endif
23735 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23736 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
23738 #ifdef __AARCH64EB__
23739 return __builtin_shuffle (__a, __b,
23740 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23741 #else
23742 return __builtin_shuffle (__a, __b,
23743 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23744 #endif
23747 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23748 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
23750 #ifdef __AARCH64EB__
23751 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23752 #else
23753 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23754 #endif
23757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23758 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
23760 #ifdef __AARCH64EB__
23761 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23762 #else
23763 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23764 #endif
23767 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23768 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
23770 #ifdef __AARCH64EB__
23771 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23772 #else
23773 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23774 #endif
23777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23778 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
23780 #ifdef __AARCH64EB__
23781 return __builtin_shuffle (__a, __b,
23782 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23783 #else
23784 return __builtin_shuffle (__a, __b,
23785 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23786 #endif
23789 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23790 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
23792 #ifdef __AARCH64EB__
23793 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23794 #else
23795 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23796 #endif
23799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23800 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
23802 #ifdef __AARCH64EB__
23803 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23804 #else
23805 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23806 #endif
23809 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23810 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
23812 #ifdef __AARCH64EB__
23813 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23814 #else
23815 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23816 #endif
23819 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23820 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
23822 #ifdef __AARCH64EB__
23823 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23824 #else
23825 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23826 #endif
23829 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23830 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
23832 #ifdef __AARCH64EB__
23833 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23834 #else
23835 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23836 #endif
23839 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23840 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
23842 #ifdef __AARCH64EB__
23843 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23844 #else
23845 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23846 #endif
23849 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23850 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
23852 #ifdef __AARCH64EB__
23853 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23854 #else
23855 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23856 #endif
23859 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23860 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
23862 #ifdef __AARCH64EB__
23863 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23864 #else
23865 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23866 #endif
23869 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23870 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
23872 #ifdef __AARCH64EB__
23873 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23874 #else
23875 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23876 #endif
23879 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23880 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
23882 #ifdef __AARCH64EB__
23883 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23884 #else
23885 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23886 #endif
23889 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23890 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
23892 #ifdef __AARCH64EB__
23893 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23894 #else
23895 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23896 #endif
23899 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23900 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
23902 #ifdef __AARCH64EB__
23903 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23904 #else
23905 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23906 #endif
23909 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23910 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
23912 #ifdef __AARCH64EB__
23913 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23914 #else
23915 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23916 #endif
23919 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23920 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
23922 #ifdef __AARCH64EB__
23923 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23924 #else
23925 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23926 #endif
23929 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23930 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
23932 #ifdef __AARCH64EB__
23933 return __builtin_shuffle (__a, __b,
23934 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23935 #else
23936 return __builtin_shuffle (__a, __b,
23937 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23938 #endif
23941 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23942 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
23944 #ifdef __AARCH64EB__
23945 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23946 #else
23947 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23948 #endif
23951 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23952 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
23954 #ifdef __AARCH64EB__
23955 return __builtin_shuffle (__a, __b,
23956 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23957 #else
23958 return __builtin_shuffle (__a, __b,
23959 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23960 #endif
23963 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23964 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
23966 #ifdef __AARCH64EB__
23967 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23968 #else
23969 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23970 #endif
23973 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23974 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
23976 #ifdef __AARCH64EB__
23977 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23978 #else
23979 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23980 #endif
23983 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23984 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
23986 #ifdef __AARCH64EB__
23987 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23988 #else
23989 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23990 #endif
23993 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23994 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
23996 #ifdef __AARCH64EB__
23997 return __builtin_shuffle (__a, __b,
23998 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23999 #else
24000 return __builtin_shuffle (__a, __b,
24001 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24002 #endif
24005 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24006 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24008 #ifdef __AARCH64EB__
24009 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24010 #else
24011 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24012 #endif
24015 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24016 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24018 #ifdef __AARCH64EB__
24019 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24020 #else
24021 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24022 #endif
24025 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24026 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24028 #ifdef __AARCH64EB__
24029 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24030 #else
24031 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24032 #endif
24035 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24036 vtrn_f32 (float32x2_t a, float32x2_t b)
24038 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24041 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24042 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24044 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24047 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24048 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24050 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24053 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24054 vtrn_s8 (int8x8_t a, int8x8_t b)
24056 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24059 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24060 vtrn_s16 (int16x4_t a, int16x4_t b)
24062 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24065 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24066 vtrn_s32 (int32x2_t a, int32x2_t b)
24068 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24071 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24072 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24074 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24077 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24078 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24080 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24083 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24084 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24086 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24089 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24090 vtrnq_f32 (float32x4_t a, float32x4_t b)
24092 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24095 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24096 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24098 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24101 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24102 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24104 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24107 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24108 vtrnq_s8 (int8x16_t a, int8x16_t b)
24110 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24113 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24114 vtrnq_s16 (int16x8_t a, int16x8_t b)
24116 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24119 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24120 vtrnq_s32 (int32x4_t a, int32x4_t b)
24122 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24125 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24126 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24128 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24131 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24132 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24134 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24137 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24138 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24140 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24143 /* vtst */
24145 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24146 vtst_s8 (int8x8_t __a, int8x8_t __b)
24148 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
24151 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24152 vtst_s16 (int16x4_t __a, int16x4_t __b)
24154 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
24157 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24158 vtst_s32 (int32x2_t __a, int32x2_t __b)
24160 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
24163 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24164 vtst_s64 (int64x1_t __a, int64x1_t __b)
24166 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24169 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24170 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24172 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
24173 (int8x8_t) __b);
24176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24177 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24179 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
24180 (int16x4_t) __b);
24183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24184 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24186 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
24187 (int32x2_t) __b);
24190 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24191 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24193 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24196 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24197 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24199 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
24202 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24203 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24205 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
24208 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24209 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24211 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
24214 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24215 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24217 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
24220 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24221 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24223 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
24224 (int8x16_t) __b);
24227 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24228 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24230 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
24231 (int16x8_t) __b);
24234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24235 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24237 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
24238 (int32x4_t) __b);
24241 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24242 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24244 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
24245 (int64x2_t) __b);
24248 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24249 vtstd_s64 (int64_t __a, int64_t __b)
24251 return (__a & __b) ? -1ll : 0ll;
24254 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24255 vtstd_u64 (uint64_t __a, uint64_t __b)
24257 return (__a & __b) ? -1ll : 0ll;
24260 /* vuqadd */
24262 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24263 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24265 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24268 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24269 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24271 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24274 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24275 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24277 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24280 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24281 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24283 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24286 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24287 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24289 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24292 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24293 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24295 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24298 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24299 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24301 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24305 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24307 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24310 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
24311 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
24313 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24316 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
24317 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
24319 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24322 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
24323 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
24325 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24328 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24329 vuqaddd_s64 (int64_t __a, uint64_t __b)
24331 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24334 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24335 __extension__ static __inline rettype \
24336 __attribute__ ((__always_inline__)) \
24337 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24339 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24340 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24343 #define __INTERLEAVE_LIST(op) \
24344 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24345 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24346 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24347 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24348 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24349 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24350 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24351 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24352 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24353 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24354 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24355 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24356 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24357 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24358 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24359 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24360 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24361 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24363 /* vuzp */
24365 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24366 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24368 #ifdef __AARCH64EB__
24369 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24370 #else
24371 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24372 #endif
24375 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24376 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24378 #ifdef __AARCH64EB__
24379 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24380 #else
24381 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24382 #endif
24385 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24386 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24388 #ifdef __AARCH64EB__
24389 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24390 #else
24391 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24392 #endif
24395 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24396 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24398 #ifdef __AARCH64EB__
24399 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24400 #else
24401 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24402 #endif
24405 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24406 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24408 #ifdef __AARCH64EB__
24409 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24410 #else
24411 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24412 #endif
24415 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24416 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24418 #ifdef __AARCH64EB__
24419 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24420 #else
24421 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24422 #endif
24425 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24426 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24428 #ifdef __AARCH64EB__
24429 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24430 #else
24431 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24432 #endif
24435 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24436 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24438 #ifdef __AARCH64EB__
24439 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24440 #else
24441 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24442 #endif
24445 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24446 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24448 #ifdef __AARCH64EB__
24449 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24450 #else
24451 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24452 #endif
24455 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24456 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24458 #ifdef __AARCH64EB__
24459 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24460 #else
24461 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24462 #endif
24465 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24466 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24468 #ifdef __AARCH64EB__
24469 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24470 #else
24471 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24472 #endif
24475 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24476 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24478 #ifdef __AARCH64EB__
24479 return __builtin_shuffle (__a, __b, (uint8x16_t)
24480 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24481 #else
24482 return __builtin_shuffle (__a, __b, (uint8x16_t)
24483 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24484 #endif
24487 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24488 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
24490 #ifdef __AARCH64EB__
24491 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24492 #else
24493 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24494 #endif
24497 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24498 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
24500 #ifdef __AARCH64EB__
24501 return __builtin_shuffle (__a, __b,
24502 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24503 #else
24504 return __builtin_shuffle (__a, __b,
24505 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24506 #endif
24509 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24510 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
24512 #ifdef __AARCH64EB__
24513 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24514 #else
24515 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24516 #endif
24519 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24520 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
24522 #ifdef __AARCH64EB__
24523 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24524 #else
24525 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24526 #endif
24529 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24530 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
24532 #ifdef __AARCH64EB__
24533 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24534 #else
24535 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24536 #endif
24539 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24540 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
24542 #ifdef __AARCH64EB__
24543 return __builtin_shuffle (__a, __b,
24544 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24545 #else
24546 return __builtin_shuffle (__a, __b,
24547 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24548 #endif
24551 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24552 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
24554 #ifdef __AARCH64EB__
24555 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24556 #else
24557 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24558 #endif
24561 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24562 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
24564 #ifdef __AARCH64EB__
24565 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24566 #else
24567 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24568 #endif
24571 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24572 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
24574 #ifdef __AARCH64EB__
24575 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24576 #else
24577 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24578 #endif
24581 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24582 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
24584 #ifdef __AARCH64EB__
24585 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24586 #else
24587 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24588 #endif
24591 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24592 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
24594 #ifdef __AARCH64EB__
24595 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24596 #else
24597 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24598 #endif
24601 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24602 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
24604 #ifdef __AARCH64EB__
24605 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24606 #else
24607 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24608 #endif
24611 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24612 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
24614 #ifdef __AARCH64EB__
24615 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24616 #else
24617 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24618 #endif
24621 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24622 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
24624 #ifdef __AARCH64EB__
24625 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24626 #else
24627 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24628 #endif
24631 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24632 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
24634 #ifdef __AARCH64EB__
24635 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24636 #else
24637 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24638 #endif
24641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24642 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
24644 #ifdef __AARCH64EB__
24645 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24646 #else
24647 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24648 #endif
24651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24652 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
24654 #ifdef __AARCH64EB__
24655 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24656 #else
24657 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24658 #endif
24661 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24662 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
24664 #ifdef __AARCH64EB__
24665 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24666 #else
24667 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24668 #endif
24671 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24672 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
24674 #ifdef __AARCH64EB__
24675 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24676 #else
24677 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24678 #endif
24681 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24682 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
24684 #ifdef __AARCH64EB__
24685 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24686 #else
24687 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24688 #endif
24691 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24692 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
24694 #ifdef __AARCH64EB__
24695 return __builtin_shuffle (__a, __b,
24696 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24697 #else
24698 return __builtin_shuffle (__a, __b,
24699 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24700 #endif
24703 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24704 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
24706 #ifdef __AARCH64EB__
24707 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24708 #else
24709 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24710 #endif
24713 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24714 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
24716 #ifdef __AARCH64EB__
24717 return __builtin_shuffle (__a, __b,
24718 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24719 #else
24720 return __builtin_shuffle (__a, __b,
24721 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24722 #endif
24725 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24726 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
24728 #ifdef __AARCH64EB__
24729 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24730 #else
24731 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24732 #endif
24735 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24736 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
24738 #ifdef __AARCH64EB__
24739 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24740 #else
24741 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24742 #endif
24745 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24746 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
24748 #ifdef __AARCH64EB__
24749 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24750 #else
24751 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24752 #endif
24755 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24756 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
24758 #ifdef __AARCH64EB__
24759 return __builtin_shuffle (__a, __b, (uint8x16_t)
24760 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24761 #else
24762 return __builtin_shuffle (__a, __b, (uint8x16_t)
24763 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24764 #endif
24767 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24768 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
24770 #ifdef __AARCH64EB__
24771 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24772 #else
24773 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24774 #endif
24777 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24778 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
24780 #ifdef __AARCH64EB__
24781 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24782 #else
24783 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24784 #endif
24787 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24788 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
24790 #ifdef __AARCH64EB__
24791 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24792 #else
24793 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24794 #endif
24797 __INTERLEAVE_LIST (uzp)
24799 /* vzip */
24801 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24802 vzip1_f32 (float32x2_t __a, float32x2_t __b)
24804 #ifdef __AARCH64EB__
24805 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24806 #else
24807 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24808 #endif
24811 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24812 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
24814 #ifdef __AARCH64EB__
24815 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24816 #else
24817 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24818 #endif
24821 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24822 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
24824 #ifdef __AARCH64EB__
24825 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24826 #else
24827 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24828 #endif
24831 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24832 vzip1_s8 (int8x8_t __a, int8x8_t __b)
24834 #ifdef __AARCH64EB__
24835 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24836 #else
24837 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24838 #endif
24841 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24842 vzip1_s16 (int16x4_t __a, int16x4_t __b)
24844 #ifdef __AARCH64EB__
24845 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24846 #else
24847 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24848 #endif
24851 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24852 vzip1_s32 (int32x2_t __a, int32x2_t __b)
24854 #ifdef __AARCH64EB__
24855 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24856 #else
24857 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24858 #endif
24861 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24862 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
24864 #ifdef __AARCH64EB__
24865 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24866 #else
24867 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24868 #endif
24871 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24872 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
24874 #ifdef __AARCH64EB__
24875 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24876 #else
24877 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24878 #endif
24881 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24882 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
24884 #ifdef __AARCH64EB__
24885 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24886 #else
24887 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24888 #endif
24891 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24892 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
24894 #ifdef __AARCH64EB__
24895 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24896 #else
24897 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24898 #endif
24901 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24902 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
24904 #ifdef __AARCH64EB__
24905 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24906 #else
24907 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24908 #endif
24911 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24912 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
24914 #ifdef __AARCH64EB__
24915 return __builtin_shuffle (__a, __b, (uint8x16_t)
24916 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24917 #else
24918 return __builtin_shuffle (__a, __b, (uint8x16_t)
24919 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24920 #endif
24923 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24924 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
24926 #ifdef __AARCH64EB__
24927 return __builtin_shuffle (__a, __b, (uint16x8_t)
24928 {12, 4, 13, 5, 14, 6, 15, 7});
24929 #else
24930 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24931 #endif
24934 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24935 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
24937 #ifdef __AARCH64EB__
24938 return __builtin_shuffle (__a, __b, (uint8x16_t)
24939 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24940 #else
24941 return __builtin_shuffle (__a, __b, (uint8x16_t)
24942 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24943 #endif
24946 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24947 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
24949 #ifdef __AARCH64EB__
24950 return __builtin_shuffle (__a, __b, (uint16x8_t)
24951 {12, 4, 13, 5, 14, 6, 15, 7});
24952 #else
24953 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24954 #endif
24957 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24958 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
24960 #ifdef __AARCH64EB__
24961 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24962 #else
24963 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24964 #endif
24967 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24968 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
24970 #ifdef __AARCH64EB__
24971 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24972 #else
24973 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24974 #endif
24977 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24978 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
24980 #ifdef __AARCH64EB__
24981 return __builtin_shuffle (__a, __b, (uint8x16_t)
24982 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24983 #else
24984 return __builtin_shuffle (__a, __b, (uint8x16_t)
24985 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24986 #endif
24989 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24990 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
24992 #ifdef __AARCH64EB__
24993 return __builtin_shuffle (__a, __b, (uint16x8_t)
24994 {12, 4, 13, 5, 14, 6, 15, 7});
24995 #else
24996 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24997 #endif
25000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25001 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25003 #ifdef __AARCH64EB__
25004 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25005 #else
25006 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25007 #endif
25010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25011 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25013 #ifdef __AARCH64EB__
25014 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25015 #else
25016 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25017 #endif
25020 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25021 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25023 #ifdef __AARCH64EB__
25024 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25025 #else
25026 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25027 #endif
25030 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25031 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25033 #ifdef __AARCH64EB__
25034 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25035 #else
25036 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25037 #endif
25040 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25041 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25043 #ifdef __AARCH64EB__
25044 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25045 #else
25046 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25047 #endif
25050 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25051 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25053 #ifdef __AARCH64EB__
25054 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25055 #else
25056 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25057 #endif
25060 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25061 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25063 #ifdef __AARCH64EB__
25064 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25065 #else
25066 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25067 #endif
25070 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25071 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25073 #ifdef __AARCH64EB__
25074 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25075 #else
25076 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25077 #endif
25080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25081 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25083 #ifdef __AARCH64EB__
25084 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25085 #else
25086 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25087 #endif
25090 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25091 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25093 #ifdef __AARCH64EB__
25094 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25095 #else
25096 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25097 #endif
25100 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25101 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25103 #ifdef __AARCH64EB__
25104 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25105 #else
25106 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25107 #endif
25110 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25111 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25113 #ifdef __AARCH64EB__
25114 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25115 #else
25116 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25117 #endif
25120 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25121 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25123 #ifdef __AARCH64EB__
25124 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25125 #else
25126 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25127 #endif
25130 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25131 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25133 #ifdef __AARCH64EB__
25134 return __builtin_shuffle (__a, __b, (uint8x16_t)
25135 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25136 #else
25137 return __builtin_shuffle (__a, __b, (uint8x16_t)
25138 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25139 #endif
25142 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25143 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25145 #ifdef __AARCH64EB__
25146 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25147 #else
25148 return __builtin_shuffle (__a, __b, (uint16x8_t)
25149 {4, 12, 5, 13, 6, 14, 7, 15});
25150 #endif
25153 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25154 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25156 #ifdef __AARCH64EB__
25157 return __builtin_shuffle (__a, __b, (uint8x16_t)
25158 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25159 #else
25160 return __builtin_shuffle (__a, __b, (uint8x16_t)
25161 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25162 #endif
25165 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25166 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25168 #ifdef __AARCH64EB__
25169 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25170 #else
25171 return __builtin_shuffle (__a, __b, (uint16x8_t)
25172 {4, 12, 5, 13, 6, 14, 7, 15});
25173 #endif
25176 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25177 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25179 #ifdef __AARCH64EB__
25180 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25181 #else
25182 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25183 #endif
25186 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25187 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25189 #ifdef __AARCH64EB__
25190 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25191 #else
25192 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25193 #endif
25196 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25197 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25199 #ifdef __AARCH64EB__
25200 return __builtin_shuffle (__a, __b, (uint8x16_t)
25201 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25202 #else
25203 return __builtin_shuffle (__a, __b, (uint8x16_t)
25204 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25205 #endif
25208 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25209 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25211 #ifdef __AARCH64EB__
25212 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25213 #else
25214 return __builtin_shuffle (__a, __b, (uint16x8_t)
25215 {4, 12, 5, 13, 6, 14, 7, 15});
25216 #endif
25219 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25220 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25222 #ifdef __AARCH64EB__
25223 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25224 #else
25225 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25226 #endif
25229 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25230 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25232 #ifdef __AARCH64EB__
25233 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25234 #else
25235 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25236 #endif
25239 __INTERLEAVE_LIST (zip)
25241 #undef __INTERLEAVE_LIST
25242 #undef __DEFINTERLEAVE
25244 /* End of optimal implementations in approved order. */
25246 #undef __aarch64_vget_lane_any
25247 #undef __aarch64_vget_lane_f32
25248 #undef __aarch64_vget_lane_f64
25249 #undef __aarch64_vget_lane_p8
25250 #undef __aarch64_vget_lane_p16
25251 #undef __aarch64_vget_lane_s8
25252 #undef __aarch64_vget_lane_s16
25253 #undef __aarch64_vget_lane_s32
25254 #undef __aarch64_vget_lane_s64
25255 #undef __aarch64_vget_lane_u8
25256 #undef __aarch64_vget_lane_u16
25257 #undef __aarch64_vget_lane_u32
25258 #undef __aarch64_vget_lane_u64
25260 #undef __aarch64_vgetq_lane_f32
25261 #undef __aarch64_vgetq_lane_f64
25262 #undef __aarch64_vgetq_lane_p8
25263 #undef __aarch64_vgetq_lane_p16
25264 #undef __aarch64_vgetq_lane_s8
25265 #undef __aarch64_vgetq_lane_s16
25266 #undef __aarch64_vgetq_lane_s32
25267 #undef __aarch64_vgetq_lane_s64
25268 #undef __aarch64_vgetq_lane_u8
25269 #undef __aarch64_vgetq_lane_u16
25270 #undef __aarch64_vgetq_lane_u32
25271 #undef __aarch64_vgetq_lane_u64
25273 #undef __aarch64_vdup_lane_any
25274 #undef __aarch64_vdup_lane_f32
25275 #undef __aarch64_vdup_lane_f64
25276 #undef __aarch64_vdup_lane_p8
25277 #undef __aarch64_vdup_lane_p16
25278 #undef __aarch64_vdup_lane_s8
25279 #undef __aarch64_vdup_lane_s16
25280 #undef __aarch64_vdup_lane_s32
25281 #undef __aarch64_vdup_lane_s64
25282 #undef __aarch64_vdup_lane_u8
25283 #undef __aarch64_vdup_lane_u16
25284 #undef __aarch64_vdup_lane_u32
25285 #undef __aarch64_vdup_lane_u64
25286 #undef __aarch64_vdup_laneq_f32
25287 #undef __aarch64_vdup_laneq_f64
25288 #undef __aarch64_vdup_laneq_p8
25289 #undef __aarch64_vdup_laneq_p16
25290 #undef __aarch64_vdup_laneq_s8
25291 #undef __aarch64_vdup_laneq_s16
25292 #undef __aarch64_vdup_laneq_s32
25293 #undef __aarch64_vdup_laneq_s64
25294 #undef __aarch64_vdup_laneq_u8
25295 #undef __aarch64_vdup_laneq_u16
25296 #undef __aarch64_vdup_laneq_u32
25297 #undef __aarch64_vdup_laneq_u64
25298 #undef __aarch64_vdupq_lane_f32
25299 #undef __aarch64_vdupq_lane_f64
25300 #undef __aarch64_vdupq_lane_p8
25301 #undef __aarch64_vdupq_lane_p16
25302 #undef __aarch64_vdupq_lane_s8
25303 #undef __aarch64_vdupq_lane_s16
25304 #undef __aarch64_vdupq_lane_s32
25305 #undef __aarch64_vdupq_lane_s64
25306 #undef __aarch64_vdupq_lane_u8
25307 #undef __aarch64_vdupq_lane_u16
25308 #undef __aarch64_vdupq_lane_u32
25309 #undef __aarch64_vdupq_lane_u64
25310 #undef __aarch64_vdupq_laneq_f32
25311 #undef __aarch64_vdupq_laneq_f64
25312 #undef __aarch64_vdupq_laneq_p8
25313 #undef __aarch64_vdupq_laneq_p16
25314 #undef __aarch64_vdupq_laneq_s8
25315 #undef __aarch64_vdupq_laneq_s16
25316 #undef __aarch64_vdupq_laneq_s32
25317 #undef __aarch64_vdupq_laneq_s64
25318 #undef __aarch64_vdupq_laneq_u8
25319 #undef __aarch64_vdupq_laneq_u16
25320 #undef __aarch64_vdupq_laneq_u32
25321 #undef __aarch64_vdupq_laneq_u64
25323 #endif