[AArch64] Implement vmul<q>_lane<q>_<fsu><16,32,64> intrinsics in C
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob6c9dd79a69508139bfb09631941396aa042a05cc
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef int64_t int64x1_t;
42 typedef int32_t int32x1_t;
43 typedef int16_t int16x1_t;
44 typedef int8_t int8x1_t;
45 typedef double float64x1_t;
46 typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef uint64_t uint64x1_t;
59 typedef uint32_t uint32x1_t;
60 typedef uint16_t uint16x1_t;
61 typedef uint8_t uint8x1_t;
62 typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68 typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70 typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72 typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74 typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76 typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78 typedef __builtin_aarch64_simd_uqi uint8x16_t
79 __attribute__ ((__vector_size__ (16)));
80 typedef __builtin_aarch64_simd_uhi uint16x8_t
81 __attribute__ ((__vector_size__ (16)));
82 typedef __builtin_aarch64_simd_usi uint32x4_t
83 __attribute__ ((__vector_size__ (16)));
84 typedef __builtin_aarch64_simd_udi uint64x2_t
85 __attribute__ ((__vector_size__ (16)));
87 typedef float float32_t;
88 typedef double float64_t;
89 typedef __builtin_aarch64_simd_poly8 poly8_t;
90 typedef __builtin_aarch64_simd_poly16 poly16_t;
92 typedef struct int8x8x2_t
94 int8x8_t val[2];
95 } int8x8x2_t;
97 typedef struct int8x16x2_t
99 int8x16_t val[2];
100 } int8x16x2_t;
102 typedef struct int16x4x2_t
104 int16x4_t val[2];
105 } int16x4x2_t;
107 typedef struct int16x8x2_t
109 int16x8_t val[2];
110 } int16x8x2_t;
112 typedef struct int32x2x2_t
114 int32x2_t val[2];
115 } int32x2x2_t;
117 typedef struct int32x4x2_t
119 int32x4_t val[2];
120 } int32x4x2_t;
122 typedef struct int64x1x2_t
124 int64x1_t val[2];
125 } int64x1x2_t;
127 typedef struct int64x2x2_t
129 int64x2_t val[2];
130 } int64x2x2_t;
132 typedef struct uint8x8x2_t
134 uint8x8_t val[2];
135 } uint8x8x2_t;
137 typedef struct uint8x16x2_t
139 uint8x16_t val[2];
140 } uint8x16x2_t;
142 typedef struct uint16x4x2_t
144 uint16x4_t val[2];
145 } uint16x4x2_t;
147 typedef struct uint16x8x2_t
149 uint16x8_t val[2];
150 } uint16x8x2_t;
152 typedef struct uint32x2x2_t
154 uint32x2_t val[2];
155 } uint32x2x2_t;
157 typedef struct uint32x4x2_t
159 uint32x4_t val[2];
160 } uint32x4x2_t;
162 typedef struct uint64x1x2_t
164 uint64x1_t val[2];
165 } uint64x1x2_t;
167 typedef struct uint64x2x2_t
169 uint64x2_t val[2];
170 } uint64x2x2_t;
172 typedef struct float32x2x2_t
174 float32x2_t val[2];
175 } float32x2x2_t;
177 typedef struct float32x4x2_t
179 float32x4_t val[2];
180 } float32x4x2_t;
182 typedef struct float64x2x2_t
184 float64x2_t val[2];
185 } float64x2x2_t;
187 typedef struct float64x1x2_t
189 float64x1_t val[2];
190 } float64x1x2_t;
192 typedef struct poly8x8x2_t
194 poly8x8_t val[2];
195 } poly8x8x2_t;
197 typedef struct poly8x16x2_t
199 poly8x16_t val[2];
200 } poly8x16x2_t;
202 typedef struct poly16x4x2_t
204 poly16x4_t val[2];
205 } poly16x4x2_t;
207 typedef struct poly16x8x2_t
209 poly16x8_t val[2];
210 } poly16x8x2_t;
212 typedef struct int8x8x3_t
214 int8x8_t val[3];
215 } int8x8x3_t;
217 typedef struct int8x16x3_t
219 int8x16_t val[3];
220 } int8x16x3_t;
222 typedef struct int16x4x3_t
224 int16x4_t val[3];
225 } int16x4x3_t;
227 typedef struct int16x8x3_t
229 int16x8_t val[3];
230 } int16x8x3_t;
232 typedef struct int32x2x3_t
234 int32x2_t val[3];
235 } int32x2x3_t;
237 typedef struct int32x4x3_t
239 int32x4_t val[3];
240 } int32x4x3_t;
242 typedef struct int64x1x3_t
244 int64x1_t val[3];
245 } int64x1x3_t;
247 typedef struct int64x2x3_t
249 int64x2_t val[3];
250 } int64x2x3_t;
252 typedef struct uint8x8x3_t
254 uint8x8_t val[3];
255 } uint8x8x3_t;
257 typedef struct uint8x16x3_t
259 uint8x16_t val[3];
260 } uint8x16x3_t;
262 typedef struct uint16x4x3_t
264 uint16x4_t val[3];
265 } uint16x4x3_t;
267 typedef struct uint16x8x3_t
269 uint16x8_t val[3];
270 } uint16x8x3_t;
272 typedef struct uint32x2x3_t
274 uint32x2_t val[3];
275 } uint32x2x3_t;
277 typedef struct uint32x4x3_t
279 uint32x4_t val[3];
280 } uint32x4x3_t;
282 typedef struct uint64x1x3_t
284 uint64x1_t val[3];
285 } uint64x1x3_t;
287 typedef struct uint64x2x3_t
289 uint64x2_t val[3];
290 } uint64x2x3_t;
292 typedef struct float32x2x3_t
294 float32x2_t val[3];
295 } float32x2x3_t;
297 typedef struct float32x4x3_t
299 float32x4_t val[3];
300 } float32x4x3_t;
302 typedef struct float64x2x3_t
304 float64x2_t val[3];
305 } float64x2x3_t;
307 typedef struct float64x1x3_t
309 float64x1_t val[3];
310 } float64x1x3_t;
312 typedef struct poly8x8x3_t
314 poly8x8_t val[3];
315 } poly8x8x3_t;
317 typedef struct poly8x16x3_t
319 poly8x16_t val[3];
320 } poly8x16x3_t;
322 typedef struct poly16x4x3_t
324 poly16x4_t val[3];
325 } poly16x4x3_t;
327 typedef struct poly16x8x3_t
329 poly16x8_t val[3];
330 } poly16x8x3_t;
332 typedef struct int8x8x4_t
334 int8x8_t val[4];
335 } int8x8x4_t;
337 typedef struct int8x16x4_t
339 int8x16_t val[4];
340 } int8x16x4_t;
342 typedef struct int16x4x4_t
344 int16x4_t val[4];
345 } int16x4x4_t;
347 typedef struct int16x8x4_t
349 int16x8_t val[4];
350 } int16x8x4_t;
352 typedef struct int32x2x4_t
354 int32x2_t val[4];
355 } int32x2x4_t;
357 typedef struct int32x4x4_t
359 int32x4_t val[4];
360 } int32x4x4_t;
362 typedef struct int64x1x4_t
364 int64x1_t val[4];
365 } int64x1x4_t;
367 typedef struct int64x2x4_t
369 int64x2_t val[4];
370 } int64x2x4_t;
372 typedef struct uint8x8x4_t
374 uint8x8_t val[4];
375 } uint8x8x4_t;
377 typedef struct uint8x16x4_t
379 uint8x16_t val[4];
380 } uint8x16x4_t;
382 typedef struct uint16x4x4_t
384 uint16x4_t val[4];
385 } uint16x4x4_t;
387 typedef struct uint16x8x4_t
389 uint16x8_t val[4];
390 } uint16x8x4_t;
392 typedef struct uint32x2x4_t
394 uint32x2_t val[4];
395 } uint32x2x4_t;
397 typedef struct uint32x4x4_t
399 uint32x4_t val[4];
400 } uint32x4x4_t;
402 typedef struct uint64x1x4_t
404 uint64x1_t val[4];
405 } uint64x1x4_t;
407 typedef struct uint64x2x4_t
409 uint64x2_t val[4];
410 } uint64x2x4_t;
412 typedef struct float32x2x4_t
414 float32x2_t val[4];
415 } float32x2x4_t;
417 typedef struct float32x4x4_t
419 float32x4_t val[4];
420 } float32x4x4_t;
422 typedef struct float64x2x4_t
424 float64x2_t val[4];
425 } float64x2x4_t;
427 typedef struct float64x1x4_t
429 float64x1_t val[4];
430 } float64x1x4_t;
432 typedef struct poly8x8x4_t
434 poly8x8_t val[4];
435 } poly8x8x4_t;
437 typedef struct poly8x16x4_t
439 poly8x16_t val[4];
440 } poly8x16x4_t;
442 typedef struct poly16x4x4_t
444 poly16x4_t val[4];
445 } poly16x4x4_t;
447 typedef struct poly16x8x4_t
449 poly16x8_t val[4];
450 } poly16x8x4_t;
452 /* vget_lane internal macros. */
454 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
455 (__cast_ret \
456 __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
458 #define __aarch64_vget_lane_f32(__a, __b) \
459 __aarch64_vget_lane_any (v2sf, , , __a, __b)
460 #define __aarch64_vget_lane_f64(__a, __b) (__a)
462 #define __aarch64_vget_lane_p8(__a, __b) \
463 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
464 #define __aarch64_vget_lane_p16(__a, __b) \
465 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
467 #define __aarch64_vget_lane_s8(__a, __b) \
468 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
469 #define __aarch64_vget_lane_s16(__a, __b) \
470 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
471 #define __aarch64_vget_lane_s32(__a, __b) \
472 __aarch64_vget_lane_any (v2si, , ,__a, __b)
473 #define __aarch64_vget_lane_s64(__a, __b) (__a)
475 #define __aarch64_vget_lane_u8(__a, __b) \
476 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
477 #define __aarch64_vget_lane_u16(__a, __b) \
478 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
479 #define __aarch64_vget_lane_u32(__a, __b) \
480 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
481 #define __aarch64_vget_lane_u64(__a, __b) (__a)
483 #define __aarch64_vgetq_lane_f32(__a, __b) \
484 __aarch64_vget_lane_any (v4sf, , , __a, __b)
485 #define __aarch64_vgetq_lane_f64(__a, __b) \
486 __aarch64_vget_lane_any (v2df, , , __a, __b)
488 #define __aarch64_vgetq_lane_p8(__a, __b) \
489 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
490 #define __aarch64_vgetq_lane_p16(__a, __b) \
491 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
493 #define __aarch64_vgetq_lane_s8(__a, __b) \
494 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
495 #define __aarch64_vgetq_lane_s16(__a, __b) \
496 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
497 #define __aarch64_vgetq_lane_s32(__a, __b) \
498 __aarch64_vget_lane_any (v4si, , ,__a, __b)
499 #define __aarch64_vgetq_lane_s64(__a, __b) \
500 __aarch64_vget_lane_any (v2di, , ,__a, __b)
502 #define __aarch64_vgetq_lane_u8(__a, __b) \
503 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
504 #define __aarch64_vgetq_lane_u16(__a, __b) \
505 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
506 #define __aarch64_vgetq_lane_u32(__a, __b) \
507 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
508 #define __aarch64_vgetq_lane_u64(__a, __b) \
509 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
511 /* __aarch64_vdup_lane internal macros. */
512 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
513 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
515 #define __aarch64_vdup_lane_f32(__a, __b) \
516 __aarch64_vdup_lane_any (f32, , , __a, __b)
517 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
518 #define __aarch64_vdup_lane_p8(__a, __b) \
519 __aarch64_vdup_lane_any (p8, , , __a, __b)
520 #define __aarch64_vdup_lane_p16(__a, __b) \
521 __aarch64_vdup_lane_any (p16, , , __a, __b)
522 #define __aarch64_vdup_lane_s8(__a, __b) \
523 __aarch64_vdup_lane_any (s8, , , __a, __b)
524 #define __aarch64_vdup_lane_s16(__a, __b) \
525 __aarch64_vdup_lane_any (s16, , , __a, __b)
526 #define __aarch64_vdup_lane_s32(__a, __b) \
527 __aarch64_vdup_lane_any (s32, , , __a, __b)
528 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
529 #define __aarch64_vdup_lane_u8(__a, __b) \
530 __aarch64_vdup_lane_any (u8, , , __a, __b)
531 #define __aarch64_vdup_lane_u16(__a, __b) \
532 __aarch64_vdup_lane_any (u16, , , __a, __b)
533 #define __aarch64_vdup_lane_u32(__a, __b) \
534 __aarch64_vdup_lane_any (u32, , , __a, __b)
535 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
537 /* __aarch64_vdup_laneq internal macros. */
538 #define __aarch64_vdup_laneq_f32(__a, __b) \
539 __aarch64_vdup_lane_any (f32, , q, __a, __b)
540 #define __aarch64_vdup_laneq_f64(__a, __b) \
541 __aarch64_vdup_lane_any (f64, , q, __a, __b)
542 #define __aarch64_vdup_laneq_p8(__a, __b) \
543 __aarch64_vdup_lane_any (p8, , q, __a, __b)
544 #define __aarch64_vdup_laneq_p16(__a, __b) \
545 __aarch64_vdup_lane_any (p16, , q, __a, __b)
546 #define __aarch64_vdup_laneq_s8(__a, __b) \
547 __aarch64_vdup_lane_any (s8, , q, __a, __b)
548 #define __aarch64_vdup_laneq_s16(__a, __b) \
549 __aarch64_vdup_lane_any (s16, , q, __a, __b)
550 #define __aarch64_vdup_laneq_s32(__a, __b) \
551 __aarch64_vdup_lane_any (s32, , q, __a, __b)
552 #define __aarch64_vdup_laneq_s64(__a, __b) \
553 __aarch64_vdup_lane_any (s64, , q, __a, __b)
554 #define __aarch64_vdup_laneq_u8(__a, __b) \
555 __aarch64_vdup_lane_any (u8, , q, __a, __b)
556 #define __aarch64_vdup_laneq_u16(__a, __b) \
557 __aarch64_vdup_lane_any (u16, , q, __a, __b)
558 #define __aarch64_vdup_laneq_u32(__a, __b) \
559 __aarch64_vdup_lane_any (u32, , q, __a, __b)
560 #define __aarch64_vdup_laneq_u64(__a, __b) \
561 __aarch64_vdup_lane_any (u64, , q, __a, __b)
563 /* __aarch64_vdupq_lane internal macros. */
564 #define __aarch64_vdupq_lane_f32(__a, __b) \
565 __aarch64_vdup_lane_any (f32, q, , __a, __b)
566 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
567 #define __aarch64_vdupq_lane_p8(__a, __b) \
568 __aarch64_vdup_lane_any (p8, q, , __a, __b)
569 #define __aarch64_vdupq_lane_p16(__a, __b) \
570 __aarch64_vdup_lane_any (p16, q, , __a, __b)
571 #define __aarch64_vdupq_lane_s8(__a, __b) \
572 __aarch64_vdup_lane_any (s8, q, , __a, __b)
573 #define __aarch64_vdupq_lane_s16(__a, __b) \
574 __aarch64_vdup_lane_any (s16, q, , __a, __b)
575 #define __aarch64_vdupq_lane_s32(__a, __b) \
576 __aarch64_vdup_lane_any (s32, q, , __a, __b)
577 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
578 #define __aarch64_vdupq_lane_u8(__a, __b) \
579 __aarch64_vdup_lane_any (u8, q, , __a, __b)
580 #define __aarch64_vdupq_lane_u16(__a, __b) \
581 __aarch64_vdup_lane_any (u16, q, , __a, __b)
582 #define __aarch64_vdupq_lane_u32(__a, __b) \
583 __aarch64_vdup_lane_any (u32, q, , __a, __b)
584 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
586 /* __aarch64_vdupq_laneq internal macros. */
587 #define __aarch64_vdupq_laneq_f32(__a, __b) \
588 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
589 #define __aarch64_vdupq_laneq_f64(__a, __b) \
590 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
591 #define __aarch64_vdupq_laneq_p8(__a, __b) \
592 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
593 #define __aarch64_vdupq_laneq_p16(__a, __b) \
594 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
595 #define __aarch64_vdupq_laneq_s8(__a, __b) \
596 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
597 #define __aarch64_vdupq_laneq_s16(__a, __b) \
598 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
599 #define __aarch64_vdupq_laneq_s32(__a, __b) \
600 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
601 #define __aarch64_vdupq_laneq_s64(__a, __b) \
602 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
603 #define __aarch64_vdupq_laneq_u8(__a, __b) \
604 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
605 #define __aarch64_vdupq_laneq_u16(__a, __b) \
606 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
607 #define __aarch64_vdupq_laneq_u32(__a, __b) \
608 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
609 #define __aarch64_vdupq_laneq_u64(__a, __b) \
610 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
612 /* vadd */
613 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
614 vadd_s8 (int8x8_t __a, int8x8_t __b)
616 return __a + __b;
619 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
620 vadd_s16 (int16x4_t __a, int16x4_t __b)
622 return __a + __b;
625 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
626 vadd_s32 (int32x2_t __a, int32x2_t __b)
628 return __a + __b;
631 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
632 vadd_f32 (float32x2_t __a, float32x2_t __b)
634 return __a + __b;
637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
638 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
640 return __a + __b;
643 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
644 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
646 return __a + __b;
649 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
650 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
652 return __a + __b;
655 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
656 vadd_s64 (int64x1_t __a, int64x1_t __b)
658 return __a + __b;
661 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
662 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
664 return __a + __b;
667 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
668 vaddq_s8 (int8x16_t __a, int8x16_t __b)
670 return __a + __b;
673 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
674 vaddq_s16 (int16x8_t __a, int16x8_t __b)
676 return __a + __b;
679 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
680 vaddq_s32 (int32x4_t __a, int32x4_t __b)
682 return __a + __b;
685 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
686 vaddq_s64 (int64x2_t __a, int64x2_t __b)
688 return __a + __b;
691 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
692 vaddq_f32 (float32x4_t __a, float32x4_t __b)
694 return __a + __b;
697 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
698 vaddq_f64 (float64x2_t __a, float64x2_t __b)
700 return __a + __b;
703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
704 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
706 return __a + __b;
709 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
710 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
712 return __a + __b;
715 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
716 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
718 return __a + __b;
721 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
722 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
724 return __a + __b;
727 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
728 vaddl_s8 (int8x8_t __a, int8x8_t __b)
730 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
733 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
734 vaddl_s16 (int16x4_t __a, int16x4_t __b)
736 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
740 vaddl_s32 (int32x2_t __a, int32x2_t __b)
742 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
745 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
746 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
748 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
749 (int8x8_t) __b);
752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
753 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
755 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
756 (int16x4_t) __b);
759 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
760 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
762 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
763 (int32x2_t) __b);
766 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
767 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
769 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
772 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
773 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
775 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
778 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
779 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
781 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
785 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
787 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
788 (int8x16_t) __b);
791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
792 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
794 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
795 (int16x8_t) __b);
798 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
799 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
801 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
802 (int32x4_t) __b);
805 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
806 vaddw_s8 (int16x8_t __a, int8x8_t __b)
808 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
811 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
812 vaddw_s16 (int32x4_t __a, int16x4_t __b)
814 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
817 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
818 vaddw_s32 (int64x2_t __a, int32x2_t __b)
820 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
823 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
824 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
826 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
827 (int8x8_t) __b);
830 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
831 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
833 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
834 (int16x4_t) __b);
837 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
838 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
840 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
841 (int32x2_t) __b);
844 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
845 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
847 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
850 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
851 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
853 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
856 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
857 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
859 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
863 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
865 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
866 (int8x16_t) __b);
869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
870 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
872 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
873 (int16x8_t) __b);
876 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
877 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
879 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
880 (int32x4_t) __b);
883 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
884 vhadd_s8 (int8x8_t __a, int8x8_t __b)
886 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
889 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
890 vhadd_s16 (int16x4_t __a, int16x4_t __b)
892 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
895 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
896 vhadd_s32 (int32x2_t __a, int32x2_t __b)
898 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
901 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
902 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
904 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
905 (int8x8_t) __b);
908 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
909 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
911 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
912 (int16x4_t) __b);
915 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
916 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
918 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
919 (int32x2_t) __b);
922 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
923 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
925 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
928 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
929 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
931 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
934 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
935 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
937 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
940 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
941 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
943 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
944 (int8x16_t) __b);
947 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
948 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
950 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
951 (int16x8_t) __b);
954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
955 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
957 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
958 (int32x4_t) __b);
961 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
962 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
964 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
967 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
968 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
970 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
973 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
974 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
976 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
979 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
980 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
982 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
983 (int8x8_t) __b);
986 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
987 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
989 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
990 (int16x4_t) __b);
993 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
994 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
996 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
997 (int32x2_t) __b);
1000 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1001 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1003 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1006 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1007 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1009 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1012 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1013 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1015 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1018 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1019 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1021 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1022 (int8x16_t) __b);
1025 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1026 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1028 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1029 (int16x8_t) __b);
1032 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1033 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1035 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1036 (int32x4_t) __b);
1039 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1040 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1042 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1045 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1046 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1048 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1051 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1052 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1054 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1058 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1060 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1061 (int16x8_t) __b);
1064 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1065 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1067 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1068 (int32x4_t) __b);
1071 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1072 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1074 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1075 (int64x2_t) __b);
1078 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1079 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1081 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1084 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1085 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1087 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1090 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1091 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1093 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1096 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1097 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1099 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1100 (int16x8_t) __b);
1103 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1104 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1106 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1107 (int32x4_t) __b);
1110 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1111 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1113 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1114 (int64x2_t) __b);
1117 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1118 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1120 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1123 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1124 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1126 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1129 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1130 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1132 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1135 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1136 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1138 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1139 (int16x8_t) __b,
1140 (int16x8_t) __c);
1143 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1144 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1146 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1147 (int32x4_t) __b,
1148 (int32x4_t) __c);
1151 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1152 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1154 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1155 (int64x2_t) __b,
1156 (int64x2_t) __c);
1159 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1160 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1162 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1165 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1166 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1168 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1171 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1172 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1174 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1177 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1178 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1180 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1181 (int16x8_t) __b,
1182 (int16x8_t) __c);
1185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1186 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1188 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1189 (int32x4_t) __b,
1190 (int32x4_t) __c);
1193 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1194 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1196 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1197 (int64x2_t) __b,
1198 (int64x2_t) __c);
1201 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1202 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1204 return __a / __b;
1207 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1208 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1210 return __a / __b;
1213 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1214 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1216 return __a / __b;
1219 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1220 vmul_s8 (int8x8_t __a, int8x8_t __b)
1222 return __a * __b;
1225 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1226 vmul_s16 (int16x4_t __a, int16x4_t __b)
1228 return __a * __b;
1231 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1232 vmul_s32 (int32x2_t __a, int32x2_t __b)
1234 return __a * __b;
1237 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1238 vmul_f32 (float32x2_t __a, float32x2_t __b)
1240 return __a * __b;
1243 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1244 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1246 return __a * __b;
1249 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1250 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1252 return __a * __b;
1255 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1256 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1258 return __a * __b;
1261 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1262 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1264 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1265 (int8x8_t) __b);
1268 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1269 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1271 return __a * __b;
1274 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1275 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1277 return __a * __b;
1280 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1281 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1283 return __a * __b;
1286 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1287 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1289 return __a * __b;
1292 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1293 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1295 return __a * __b;
1298 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1299 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1301 return __a * __b;
1304 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1305 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1307 return __a * __b;
1310 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1311 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1313 return __a * __b;
1316 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1317 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1319 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1320 (int8x16_t) __b);
1323 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1324 vand_s8 (int8x8_t __a, int8x8_t __b)
1326 return __a & __b;
1329 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1330 vand_s16 (int16x4_t __a, int16x4_t __b)
1332 return __a & __b;
1335 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1336 vand_s32 (int32x2_t __a, int32x2_t __b)
1338 return __a & __b;
1341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1342 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1344 return __a & __b;
1347 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1348 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1350 return __a & __b;
1353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1354 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1356 return __a & __b;
1359 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1360 vand_s64 (int64x1_t __a, int64x1_t __b)
1362 return __a & __b;
1365 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1366 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1368 return __a & __b;
1371 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1372 vandq_s8 (int8x16_t __a, int8x16_t __b)
1374 return __a & __b;
1377 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1378 vandq_s16 (int16x8_t __a, int16x8_t __b)
1380 return __a & __b;
1383 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1384 vandq_s32 (int32x4_t __a, int32x4_t __b)
1386 return __a & __b;
1389 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1390 vandq_s64 (int64x2_t __a, int64x2_t __b)
1392 return __a & __b;
1395 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1396 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1398 return __a & __b;
1401 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1402 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1404 return __a & __b;
1407 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1408 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1410 return __a & __b;
1413 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1414 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1416 return __a & __b;
1419 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1420 vorr_s8 (int8x8_t __a, int8x8_t __b)
1422 return __a | __b;
1425 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1426 vorr_s16 (int16x4_t __a, int16x4_t __b)
1428 return __a | __b;
1431 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1432 vorr_s32 (int32x2_t __a, int32x2_t __b)
1434 return __a | __b;
1437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1438 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1440 return __a | __b;
1443 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1444 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1446 return __a | __b;
1449 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1450 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1452 return __a | __b;
1455 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1456 vorr_s64 (int64x1_t __a, int64x1_t __b)
1458 return __a | __b;
1461 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1462 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1464 return __a | __b;
1467 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1468 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1470 return __a | __b;
1473 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1474 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1476 return __a | __b;
1479 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1480 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1482 return __a | __b;
1485 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1486 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1488 return __a | __b;
1491 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1492 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1494 return __a | __b;
1497 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1498 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1500 return __a | __b;
1503 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1504 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1506 return __a | __b;
1509 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1510 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1512 return __a | __b;
1515 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1516 veor_s8 (int8x8_t __a, int8x8_t __b)
1518 return __a ^ __b;
1521 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1522 veor_s16 (int16x4_t __a, int16x4_t __b)
1524 return __a ^ __b;
1527 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1528 veor_s32 (int32x2_t __a, int32x2_t __b)
1530 return __a ^ __b;
1533 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1534 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1536 return __a ^ __b;
1539 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1540 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1542 return __a ^ __b;
1545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1546 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1548 return __a ^ __b;
1551 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1552 veor_s64 (int64x1_t __a, int64x1_t __b)
1554 return __a ^ __b;
1557 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1558 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1560 return __a ^ __b;
1563 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1564 veorq_s8 (int8x16_t __a, int8x16_t __b)
1566 return __a ^ __b;
1569 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1570 veorq_s16 (int16x8_t __a, int16x8_t __b)
1572 return __a ^ __b;
1575 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1576 veorq_s32 (int32x4_t __a, int32x4_t __b)
1578 return __a ^ __b;
1581 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1582 veorq_s64 (int64x2_t __a, int64x2_t __b)
1584 return __a ^ __b;
1587 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1588 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1590 return __a ^ __b;
1593 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1594 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1596 return __a ^ __b;
1599 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1600 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1602 return __a ^ __b;
1605 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1606 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1608 return __a ^ __b;
1611 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1612 vbic_s8 (int8x8_t __a, int8x8_t __b)
1614 return __a & ~__b;
1617 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1618 vbic_s16 (int16x4_t __a, int16x4_t __b)
1620 return __a & ~__b;
1623 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1624 vbic_s32 (int32x2_t __a, int32x2_t __b)
1626 return __a & ~__b;
1629 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1630 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1632 return __a & ~__b;
1635 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1636 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1638 return __a & ~__b;
1641 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1642 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1644 return __a & ~__b;
1647 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1648 vbic_s64 (int64x1_t __a, int64x1_t __b)
1650 return __a & ~__b;
1653 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1654 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1656 return __a & ~__b;
1659 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1660 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1662 return __a & ~__b;
1665 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1666 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1668 return __a & ~__b;
1671 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1672 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1674 return __a & ~__b;
1677 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1678 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1680 return __a & ~__b;
1683 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1684 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1686 return __a & ~__b;
1689 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1690 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1692 return __a & ~__b;
1695 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1696 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1698 return __a & ~__b;
1701 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1702 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1704 return __a & ~__b;
1707 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1708 vorn_s8 (int8x8_t __a, int8x8_t __b)
1710 return __a | ~__b;
1713 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1714 vorn_s16 (int16x4_t __a, int16x4_t __b)
1716 return __a | ~__b;
1719 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1720 vorn_s32 (int32x2_t __a, int32x2_t __b)
1722 return __a | ~__b;
1725 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1726 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1728 return __a | ~__b;
1731 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1732 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1734 return __a | ~__b;
1737 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1738 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1740 return __a | ~__b;
1743 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1744 vorn_s64 (int64x1_t __a, int64x1_t __b)
1746 return __a | ~__b;
1749 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1750 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1752 return __a | ~__b;
1755 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1756 vornq_s8 (int8x16_t __a, int8x16_t __b)
1758 return __a | ~__b;
1761 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1762 vornq_s16 (int16x8_t __a, int16x8_t __b)
1764 return __a | ~__b;
1767 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1768 vornq_s32 (int32x4_t __a, int32x4_t __b)
1770 return __a | ~__b;
1773 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1774 vornq_s64 (int64x2_t __a, int64x2_t __b)
1776 return __a | ~__b;
1779 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1780 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1782 return __a | ~__b;
1785 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1786 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1788 return __a | ~__b;
1791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1792 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1794 return __a | ~__b;
1797 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1798 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1800 return __a | ~__b;
1803 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1804 vsub_s8 (int8x8_t __a, int8x8_t __b)
1806 return __a - __b;
1809 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1810 vsub_s16 (int16x4_t __a, int16x4_t __b)
1812 return __a - __b;
1815 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1816 vsub_s32 (int32x2_t __a, int32x2_t __b)
1818 return __a - __b;
1821 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1822 vsub_f32 (float32x2_t __a, float32x2_t __b)
1824 return __a - __b;
1827 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1828 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1830 return __a - __b;
1833 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1834 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1836 return __a - __b;
1839 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1840 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1842 return __a - __b;
1845 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1846 vsub_s64 (int64x1_t __a, int64x1_t __b)
1848 return __a - __b;
1851 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1852 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1854 return __a - __b;
1857 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1858 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1860 return __a - __b;
1863 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1864 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1866 return __a - __b;
1869 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1870 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1872 return __a - __b;
1875 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1876 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1878 return __a - __b;
1881 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1882 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1884 return __a - __b;
1887 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1888 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1890 return __a - __b;
1893 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1894 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1896 return __a - __b;
1899 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1900 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1902 return __a - __b;
1905 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1906 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1908 return __a - __b;
1911 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1912 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1914 return __a - __b;
1917 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1918 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1920 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1923 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1924 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1926 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1929 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1930 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1932 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1935 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1936 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1938 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1939 (int8x8_t) __b);
1942 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1943 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1945 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1946 (int16x4_t) __b);
1949 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1950 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1952 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1953 (int32x2_t) __b);
1956 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1957 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1959 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1962 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1963 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1965 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1968 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1969 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1971 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1974 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1975 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1977 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1978 (int8x16_t) __b);
1981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1982 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1984 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1985 (int16x8_t) __b);
1988 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1989 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1991 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1992 (int32x4_t) __b);
1995 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1996 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1998 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2001 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2002 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2004 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2007 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2008 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2010 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2013 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2014 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2016 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2017 (int8x8_t) __b);
2020 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2021 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2023 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2024 (int16x4_t) __b);
2027 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2028 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2030 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2031 (int32x2_t) __b);
2034 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2035 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2037 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2040 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2041 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2043 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2046 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2047 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2049 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2052 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2053 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2055 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2056 (int8x16_t) __b);
2059 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2060 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2062 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2063 (int16x8_t) __b);
2066 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2067 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2069 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2070 (int32x4_t) __b);
2073 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2074 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2076 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2079 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2080 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2082 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2085 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2086 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2088 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2091 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2092 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2094 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2097 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2098 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2100 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2101 (int8x8_t) __b);
2104 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2105 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2107 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2108 (int16x4_t) __b);
2111 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2112 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2114 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2115 (int32x2_t) __b);
2118 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2119 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2121 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2122 (int64x1_t) __b);
2125 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2126 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2128 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2132 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2134 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2138 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2140 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2143 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2144 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2146 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2149 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2150 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2152 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2153 (int8x16_t) __b);
2156 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2157 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2159 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2160 (int16x8_t) __b);
2163 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2164 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2166 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2167 (int32x4_t) __b);
2170 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2171 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2173 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2174 (int64x2_t) __b);
2177 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2178 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2180 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2183 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2184 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2186 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2189 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2190 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2192 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2195 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2196 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2198 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2201 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2202 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2204 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2205 (int8x8_t) __b);
2208 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2209 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2211 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2212 (int16x4_t) __b);
2215 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2216 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2218 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2219 (int32x2_t) __b);
2222 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2223 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2225 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2226 (int64x1_t) __b);
2229 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2230 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2232 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2235 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2236 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2238 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2241 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2242 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2244 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2247 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2248 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2250 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2253 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2254 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2256 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2257 (int8x16_t) __b);
2260 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2261 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2263 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2264 (int16x8_t) __b);
2267 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2268 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2270 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2271 (int32x4_t) __b);
2274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2275 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2277 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2278 (int64x2_t) __b);
2281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2282 vqneg_s8 (int8x8_t __a)
2284 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2287 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2288 vqneg_s16 (int16x4_t __a)
2290 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2293 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2294 vqneg_s32 (int32x2_t __a)
2296 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2299 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2300 vqnegq_s8 (int8x16_t __a)
2302 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2305 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2306 vqnegq_s16 (int16x8_t __a)
2308 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2311 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2312 vqnegq_s32 (int32x4_t __a)
2314 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2318 vqabs_s8 (int8x8_t __a)
2320 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2323 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2324 vqabs_s16 (int16x4_t __a)
2326 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2329 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2330 vqabs_s32 (int32x2_t __a)
2332 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2336 vqabsq_s8 (int8x16_t __a)
2338 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2341 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2342 vqabsq_s16 (int16x8_t __a)
2344 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2347 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2348 vqabsq_s32 (int32x4_t __a)
2350 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2353 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2354 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2356 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2359 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2360 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2362 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2365 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2366 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2368 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2371 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2372 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2374 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2377 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2378 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2380 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2383 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2384 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2386 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2389 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2390 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2392 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2395 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2396 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2398 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2401 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2402 vcreate_s8 (uint64_t __a)
2404 return (int8x8_t) __a;
2407 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2408 vcreate_s16 (uint64_t __a)
2410 return (int16x4_t) __a;
2413 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2414 vcreate_s32 (uint64_t __a)
2416 return (int32x2_t) __a;
2419 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2420 vcreate_s64 (uint64_t __a)
2422 return (int64x1_t) __a;
2425 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2426 vcreate_f32 (uint64_t __a)
2428 return (float32x2_t) __a;
2431 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2432 vcreate_u8 (uint64_t __a)
2434 return (uint8x8_t) __a;
2437 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2438 vcreate_u16 (uint64_t __a)
2440 return (uint16x4_t) __a;
2443 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2444 vcreate_u32 (uint64_t __a)
2446 return (uint32x2_t) __a;
2449 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2450 vcreate_u64 (uint64_t __a)
2452 return (uint64x1_t) __a;
2455 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2456 vcreate_f64 (uint64_t __a)
2458 return (float64x1_t) __builtin_aarch64_createdf (__a);
2461 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2462 vcreate_p8 (uint64_t __a)
2464 return (poly8x8_t) __a;
2467 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2468 vcreate_p16 (uint64_t __a)
2470 return (poly16x4_t) __a;
2473 /* vget_lane */
2475 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2476 vget_lane_f32 (float32x2_t __a, const int __b)
2478 return __aarch64_vget_lane_f32 (__a, __b);
2481 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2482 vget_lane_f64 (float64x1_t __a, const int __b)
2484 return __aarch64_vget_lane_f64 (__a, __b);
2487 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2488 vget_lane_p8 (poly8x8_t __a, const int __b)
2490 return __aarch64_vget_lane_p8 (__a, __b);
2493 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2494 vget_lane_p16 (poly16x4_t __a, const int __b)
2496 return __aarch64_vget_lane_p16 (__a, __b);
2499 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2500 vget_lane_s8 (int8x8_t __a, const int __b)
2502 return __aarch64_vget_lane_s8 (__a, __b);
2505 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2506 vget_lane_s16 (int16x4_t __a, const int __b)
2508 return __aarch64_vget_lane_s16 (__a, __b);
2511 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2512 vget_lane_s32 (int32x2_t __a, const int __b)
2514 return __aarch64_vget_lane_s32 (__a, __b);
2517 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2518 vget_lane_s64 (int64x1_t __a, const int __b)
2520 return __aarch64_vget_lane_s64 (__a, __b);
2523 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2524 vget_lane_u8 (uint8x8_t __a, const int __b)
2526 return __aarch64_vget_lane_u8 (__a, __b);
2529 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2530 vget_lane_u16 (uint16x4_t __a, const int __b)
2532 return __aarch64_vget_lane_u16 (__a, __b);
2535 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2536 vget_lane_u32 (uint32x2_t __a, const int __b)
2538 return __aarch64_vget_lane_u32 (__a, __b);
2541 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2542 vget_lane_u64 (uint64x1_t __a, const int __b)
2544 return __aarch64_vget_lane_u64 (__a, __b);
2547 /* vgetq_lane */
2549 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2550 vgetq_lane_f32 (float32x4_t __a, const int __b)
2552 return __aarch64_vgetq_lane_f32 (__a, __b);
2555 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2556 vgetq_lane_f64 (float64x2_t __a, const int __b)
2558 return __aarch64_vgetq_lane_f64 (__a, __b);
2561 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2562 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2564 return __aarch64_vgetq_lane_p8 (__a, __b);
2567 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2568 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2570 return __aarch64_vgetq_lane_p16 (__a, __b);
2573 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2574 vgetq_lane_s8 (int8x16_t __a, const int __b)
2576 return __aarch64_vgetq_lane_s8 (__a, __b);
2579 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2580 vgetq_lane_s16 (int16x8_t __a, const int __b)
2582 return __aarch64_vgetq_lane_s16 (__a, __b);
2585 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2586 vgetq_lane_s32 (int32x4_t __a, const int __b)
2588 return __aarch64_vgetq_lane_s32 (__a, __b);
2591 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2592 vgetq_lane_s64 (int64x2_t __a, const int __b)
2594 return __aarch64_vgetq_lane_s64 (__a, __b);
2597 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2598 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2600 return __aarch64_vgetq_lane_u8 (__a, __b);
2603 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2604 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2606 return __aarch64_vgetq_lane_u16 (__a, __b);
2609 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2610 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2612 return __aarch64_vgetq_lane_u32 (__a, __b);
2615 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2616 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2618 return __aarch64_vgetq_lane_u64 (__a, __b);
2621 /* vreinterpret */
2623 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2624 vreinterpret_p8_s8 (int8x8_t __a)
2626 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2629 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2630 vreinterpret_p8_s16 (int16x4_t __a)
2632 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2635 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2636 vreinterpret_p8_s32 (int32x2_t __a)
2638 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2641 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2642 vreinterpret_p8_s64 (int64x1_t __a)
2644 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2647 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2648 vreinterpret_p8_f32 (float32x2_t __a)
2650 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2654 vreinterpret_p8_u8 (uint8x8_t __a)
2656 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2659 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2660 vreinterpret_p8_u16 (uint16x4_t __a)
2662 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2665 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2666 vreinterpret_p8_u32 (uint32x2_t __a)
2668 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2671 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2672 vreinterpret_p8_u64 (uint64x1_t __a)
2674 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2677 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2678 vreinterpret_p8_p16 (poly16x4_t __a)
2680 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2683 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2684 vreinterpretq_p8_s8 (int8x16_t __a)
2686 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2689 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2690 vreinterpretq_p8_s16 (int16x8_t __a)
2692 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2695 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2696 vreinterpretq_p8_s32 (int32x4_t __a)
2698 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2701 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2702 vreinterpretq_p8_s64 (int64x2_t __a)
2704 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2707 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2708 vreinterpretq_p8_f32 (float32x4_t __a)
2710 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2713 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2714 vreinterpretq_p8_u8 (uint8x16_t __a)
2716 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2717 __a);
2720 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2721 vreinterpretq_p8_u16 (uint16x8_t __a)
2723 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2724 __a);
2727 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2728 vreinterpretq_p8_u32 (uint32x4_t __a)
2730 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2731 __a);
2734 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2735 vreinterpretq_p8_u64 (uint64x2_t __a)
2737 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2738 __a);
2741 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2742 vreinterpretq_p8_p16 (poly16x8_t __a)
2744 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2745 __a);
2748 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2749 vreinterpret_p16_s8 (int8x8_t __a)
2751 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2754 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2755 vreinterpret_p16_s16 (int16x4_t __a)
2757 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2760 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2761 vreinterpret_p16_s32 (int32x2_t __a)
2763 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2766 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2767 vreinterpret_p16_s64 (int64x1_t __a)
2769 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2772 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2773 vreinterpret_p16_f32 (float32x2_t __a)
2775 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2778 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2779 vreinterpret_p16_u8 (uint8x8_t __a)
2781 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2784 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2785 vreinterpret_p16_u16 (uint16x4_t __a)
2787 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2790 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2791 vreinterpret_p16_u32 (uint32x2_t __a)
2793 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2796 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2797 vreinterpret_p16_u64 (uint64x1_t __a)
2799 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2802 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2803 vreinterpret_p16_p8 (poly8x8_t __a)
2805 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2808 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2809 vreinterpretq_p16_s8 (int8x16_t __a)
2811 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2814 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2815 vreinterpretq_p16_s16 (int16x8_t __a)
2817 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2820 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2821 vreinterpretq_p16_s32 (int32x4_t __a)
2823 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2826 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2827 vreinterpretq_p16_s64 (int64x2_t __a)
2829 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2832 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2833 vreinterpretq_p16_f32 (float32x4_t __a)
2835 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2838 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2839 vreinterpretq_p16_u8 (uint8x16_t __a)
2841 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2842 __a);
2845 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2846 vreinterpretq_p16_u16 (uint16x8_t __a)
2848 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2851 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2852 vreinterpretq_p16_u32 (uint32x4_t __a)
2854 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2857 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2858 vreinterpretq_p16_u64 (uint64x2_t __a)
2860 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2863 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2864 vreinterpretq_p16_p8 (poly8x16_t __a)
2866 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2867 __a);
2870 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2871 vreinterpret_f32_s8 (int8x8_t __a)
2873 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2876 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2877 vreinterpret_f32_s16 (int16x4_t __a)
2879 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2882 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2883 vreinterpret_f32_s32 (int32x2_t __a)
2885 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2888 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2889 vreinterpret_f32_s64 (int64x1_t __a)
2891 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2894 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2895 vreinterpret_f32_u8 (uint8x8_t __a)
2897 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2900 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2901 vreinterpret_f32_u16 (uint16x4_t __a)
2903 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2904 __a);
2907 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2908 vreinterpret_f32_u32 (uint32x2_t __a)
2910 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2911 __a);
2914 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2915 vreinterpret_f32_u64 (uint64x1_t __a)
2917 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2920 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2921 vreinterpret_f32_p8 (poly8x8_t __a)
2923 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2926 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2927 vreinterpret_f32_p16 (poly16x4_t __a)
2929 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2930 __a);
2933 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2934 vreinterpretq_f32_s8 (int8x16_t __a)
2936 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2939 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2940 vreinterpretq_f32_s16 (int16x8_t __a)
2942 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2945 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2946 vreinterpretq_f32_s32 (int32x4_t __a)
2948 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2951 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2952 vreinterpretq_f32_s64 (int64x2_t __a)
2954 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2957 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2958 vreinterpretq_f32_u8 (uint8x16_t __a)
2960 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2961 __a);
2964 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2965 vreinterpretq_f32_u16 (uint16x8_t __a)
2967 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2968 __a);
2971 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2972 vreinterpretq_f32_u32 (uint32x4_t __a)
2974 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2975 __a);
2978 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2979 vreinterpretq_f32_u64 (uint64x2_t __a)
2981 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
2982 __a);
2985 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2986 vreinterpretq_f32_p8 (poly8x16_t __a)
2988 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2989 __a);
2992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2993 vreinterpretq_f32_p16 (poly16x8_t __a)
2995 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2996 __a);
2999 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3000 vreinterpret_s64_s8 (int8x8_t __a)
3002 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3005 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3006 vreinterpret_s64_s16 (int16x4_t __a)
3008 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3011 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3012 vreinterpret_s64_s32 (int32x2_t __a)
3014 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3017 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3018 vreinterpret_s64_f32 (float32x2_t __a)
3020 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3023 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3024 vreinterpret_s64_u8 (uint8x8_t __a)
3026 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3029 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3030 vreinterpret_s64_u16 (uint16x4_t __a)
3032 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3035 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3036 vreinterpret_s64_u32 (uint32x2_t __a)
3038 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3041 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3042 vreinterpret_s64_u64 (uint64x1_t __a)
3044 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
3047 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3048 vreinterpret_s64_p8 (poly8x8_t __a)
3050 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3053 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3054 vreinterpret_s64_p16 (poly16x4_t __a)
3056 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3059 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3060 vreinterpretq_s64_s8 (int8x16_t __a)
3062 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3065 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3066 vreinterpretq_s64_s16 (int16x8_t __a)
3068 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3071 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3072 vreinterpretq_s64_s32 (int32x4_t __a)
3074 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3078 vreinterpretq_s64_f32 (float32x4_t __a)
3080 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3083 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3084 vreinterpretq_s64_u8 (uint8x16_t __a)
3086 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3089 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3090 vreinterpretq_s64_u16 (uint16x8_t __a)
3092 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3095 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3096 vreinterpretq_s64_u32 (uint32x4_t __a)
3098 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3101 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3102 vreinterpretq_s64_u64 (uint64x2_t __a)
3104 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3107 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3108 vreinterpretq_s64_p8 (poly8x16_t __a)
3110 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3113 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3114 vreinterpretq_s64_p16 (poly16x8_t __a)
3116 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3119 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3120 vreinterpret_u64_s8 (int8x8_t __a)
3122 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3125 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3126 vreinterpret_u64_s16 (int16x4_t __a)
3128 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3131 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3132 vreinterpret_u64_s32 (int32x2_t __a)
3134 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3138 vreinterpret_u64_s64 (int64x1_t __a)
3140 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3143 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3144 vreinterpret_u64_f32 (float32x2_t __a)
3146 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3150 vreinterpret_u64_u8 (uint8x8_t __a)
3152 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3155 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3156 vreinterpret_u64_u16 (uint16x4_t __a)
3158 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3162 vreinterpret_u64_u32 (uint32x2_t __a)
3164 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3167 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3168 vreinterpret_u64_p8 (poly8x8_t __a)
3170 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3173 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3174 vreinterpret_u64_p16 (poly16x4_t __a)
3176 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3179 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3180 vreinterpretq_u64_s8 (int8x16_t __a)
3182 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3185 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3186 vreinterpretq_u64_s16 (int16x8_t __a)
3188 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3191 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3192 vreinterpretq_u64_s32 (int32x4_t __a)
3194 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3198 vreinterpretq_u64_s64 (int64x2_t __a)
3200 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3203 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3204 vreinterpretq_u64_f32 (float32x4_t __a)
3206 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3209 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3210 vreinterpretq_u64_u8 (uint8x16_t __a)
3212 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3213 __a);
3216 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3217 vreinterpretq_u64_u16 (uint16x8_t __a)
3219 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3222 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3223 vreinterpretq_u64_u32 (uint32x4_t __a)
3225 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3228 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3229 vreinterpretq_u64_p8 (poly8x16_t __a)
3231 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3232 __a);
3235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3236 vreinterpretq_u64_p16 (poly16x8_t __a)
3238 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3241 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3242 vreinterpret_s8_s16 (int16x4_t __a)
3244 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3247 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3248 vreinterpret_s8_s32 (int32x2_t __a)
3250 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3253 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3254 vreinterpret_s8_s64 (int64x1_t __a)
3256 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3259 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3260 vreinterpret_s8_f32 (float32x2_t __a)
3262 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3265 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3266 vreinterpret_s8_u8 (uint8x8_t __a)
3268 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3271 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3272 vreinterpret_s8_u16 (uint16x4_t __a)
3274 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3277 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3278 vreinterpret_s8_u32 (uint32x2_t __a)
3280 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3283 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3284 vreinterpret_s8_u64 (uint64x1_t __a)
3286 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3289 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3290 vreinterpret_s8_p8 (poly8x8_t __a)
3292 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3295 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3296 vreinterpret_s8_p16 (poly16x4_t __a)
3298 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3301 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3302 vreinterpretq_s8_s16 (int16x8_t __a)
3304 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3307 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3308 vreinterpretq_s8_s32 (int32x4_t __a)
3310 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3313 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3314 vreinterpretq_s8_s64 (int64x2_t __a)
3316 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3319 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3320 vreinterpretq_s8_f32 (float32x4_t __a)
3322 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3325 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3326 vreinterpretq_s8_u8 (uint8x16_t __a)
3328 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3329 __a);
3332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3333 vreinterpretq_s8_u16 (uint16x8_t __a)
3335 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3338 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3339 vreinterpretq_s8_u32 (uint32x4_t __a)
3341 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3345 vreinterpretq_s8_u64 (uint64x2_t __a)
3347 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3350 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3351 vreinterpretq_s8_p8 (poly8x16_t __a)
3353 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3354 __a);
3357 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3358 vreinterpretq_s8_p16 (poly16x8_t __a)
3360 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3363 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3364 vreinterpret_s16_s8 (int8x8_t __a)
3366 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3369 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3370 vreinterpret_s16_s32 (int32x2_t __a)
3372 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3375 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3376 vreinterpret_s16_s64 (int64x1_t __a)
3378 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3381 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3382 vreinterpret_s16_f32 (float32x2_t __a)
3384 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3387 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3388 vreinterpret_s16_u8 (uint8x8_t __a)
3390 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3393 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3394 vreinterpret_s16_u16 (uint16x4_t __a)
3396 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3400 vreinterpret_s16_u32 (uint32x2_t __a)
3402 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3405 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3406 vreinterpret_s16_u64 (uint64x1_t __a)
3408 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3412 vreinterpret_s16_p8 (poly8x8_t __a)
3414 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3417 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3418 vreinterpret_s16_p16 (poly16x4_t __a)
3420 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3424 vreinterpretq_s16_s8 (int8x16_t __a)
3426 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3429 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3430 vreinterpretq_s16_s32 (int32x4_t __a)
3432 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3435 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3436 vreinterpretq_s16_s64 (int64x2_t __a)
3438 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3441 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3442 vreinterpretq_s16_f32 (float32x4_t __a)
3444 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3447 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3448 vreinterpretq_s16_u8 (uint8x16_t __a)
3450 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3453 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3454 vreinterpretq_s16_u16 (uint16x8_t __a)
3456 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3459 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3460 vreinterpretq_s16_u32 (uint32x4_t __a)
3462 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3466 vreinterpretq_s16_u64 (uint64x2_t __a)
3468 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3472 vreinterpretq_s16_p8 (poly8x16_t __a)
3474 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3477 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3478 vreinterpretq_s16_p16 (poly16x8_t __a)
3480 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3483 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3484 vreinterpret_s32_s8 (int8x8_t __a)
3486 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3489 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3490 vreinterpret_s32_s16 (int16x4_t __a)
3492 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3495 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3496 vreinterpret_s32_s64 (int64x1_t __a)
3498 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3501 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3502 vreinterpret_s32_f32 (float32x2_t __a)
3504 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3507 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3508 vreinterpret_s32_u8 (uint8x8_t __a)
3510 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3513 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3514 vreinterpret_s32_u16 (uint16x4_t __a)
3516 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3519 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3520 vreinterpret_s32_u32 (uint32x2_t __a)
3522 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3525 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3526 vreinterpret_s32_u64 (uint64x1_t __a)
3528 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3531 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3532 vreinterpret_s32_p8 (poly8x8_t __a)
3534 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3537 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3538 vreinterpret_s32_p16 (poly16x4_t __a)
3540 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3543 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3544 vreinterpretq_s32_s8 (int8x16_t __a)
3546 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3549 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3550 vreinterpretq_s32_s16 (int16x8_t __a)
3552 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3555 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3556 vreinterpretq_s32_s64 (int64x2_t __a)
3558 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3561 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3562 vreinterpretq_s32_f32 (float32x4_t __a)
3564 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3567 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3568 vreinterpretq_s32_u8 (uint8x16_t __a)
3570 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3573 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3574 vreinterpretq_s32_u16 (uint16x8_t __a)
3576 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3579 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3580 vreinterpretq_s32_u32 (uint32x4_t __a)
3582 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3585 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3586 vreinterpretq_s32_u64 (uint64x2_t __a)
3588 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3592 vreinterpretq_s32_p8 (poly8x16_t __a)
3594 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3597 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3598 vreinterpretq_s32_p16 (poly16x8_t __a)
3600 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3603 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3604 vreinterpret_u8_s8 (int8x8_t __a)
3606 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3609 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3610 vreinterpret_u8_s16 (int16x4_t __a)
3612 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3615 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3616 vreinterpret_u8_s32 (int32x2_t __a)
3618 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3621 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3622 vreinterpret_u8_s64 (int64x1_t __a)
3624 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3627 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3628 vreinterpret_u8_f32 (float32x2_t __a)
3630 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3633 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3634 vreinterpret_u8_u16 (uint16x4_t __a)
3636 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3640 vreinterpret_u8_u32 (uint32x2_t __a)
3642 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3646 vreinterpret_u8_u64 (uint64x1_t __a)
3648 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3651 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3652 vreinterpret_u8_p8 (poly8x8_t __a)
3654 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3657 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3658 vreinterpret_u8_p16 (poly16x4_t __a)
3660 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3663 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3664 vreinterpretq_u8_s8 (int8x16_t __a)
3666 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3669 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3670 vreinterpretq_u8_s16 (int16x8_t __a)
3672 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3675 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3676 vreinterpretq_u8_s32 (int32x4_t __a)
3678 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3681 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3682 vreinterpretq_u8_s64 (int64x2_t __a)
3684 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3688 vreinterpretq_u8_f32 (float32x4_t __a)
3690 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3693 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3694 vreinterpretq_u8_u16 (uint16x8_t __a)
3696 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3697 __a);
3700 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3701 vreinterpretq_u8_u32 (uint32x4_t __a)
3703 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3704 __a);
3707 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3708 vreinterpretq_u8_u64 (uint64x2_t __a)
3710 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3711 __a);
3714 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3715 vreinterpretq_u8_p8 (poly8x16_t __a)
3717 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3718 __a);
3721 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3722 vreinterpretq_u8_p16 (poly16x8_t __a)
3724 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3725 __a);
3728 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3729 vreinterpret_u16_s8 (int8x8_t __a)
3731 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3734 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3735 vreinterpret_u16_s16 (int16x4_t __a)
3737 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3740 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3741 vreinterpret_u16_s32 (int32x2_t __a)
3743 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3746 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3747 vreinterpret_u16_s64 (int64x1_t __a)
3749 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3753 vreinterpret_u16_f32 (float32x2_t __a)
3755 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3758 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3759 vreinterpret_u16_u8 (uint8x8_t __a)
3761 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3764 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3765 vreinterpret_u16_u32 (uint32x2_t __a)
3767 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3770 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3771 vreinterpret_u16_u64 (uint64x1_t __a)
3773 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3776 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3777 vreinterpret_u16_p8 (poly8x8_t __a)
3779 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3782 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3783 vreinterpret_u16_p16 (poly16x4_t __a)
3785 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3788 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3789 vreinterpretq_u16_s8 (int8x16_t __a)
3791 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3795 vreinterpretq_u16_s16 (int16x8_t __a)
3797 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3800 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3801 vreinterpretq_u16_s32 (int32x4_t __a)
3803 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3806 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3807 vreinterpretq_u16_s64 (int64x2_t __a)
3809 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3812 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3813 vreinterpretq_u16_f32 (float32x4_t __a)
3815 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3818 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3819 vreinterpretq_u16_u8 (uint8x16_t __a)
3821 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3822 __a);
3825 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3826 vreinterpretq_u16_u32 (uint32x4_t __a)
3828 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3831 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3832 vreinterpretq_u16_u64 (uint64x2_t __a)
3834 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3837 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3838 vreinterpretq_u16_p8 (poly8x16_t __a)
3840 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3841 __a);
3844 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3845 vreinterpretq_u16_p16 (poly16x8_t __a)
3847 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3850 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3851 vreinterpret_u32_s8 (int8x8_t __a)
3853 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3856 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3857 vreinterpret_u32_s16 (int16x4_t __a)
3859 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3862 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3863 vreinterpret_u32_s32 (int32x2_t __a)
3865 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3868 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3869 vreinterpret_u32_s64 (int64x1_t __a)
3871 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3874 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3875 vreinterpret_u32_f32 (float32x2_t __a)
3877 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3880 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3881 vreinterpret_u32_u8 (uint8x8_t __a)
3883 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3886 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3887 vreinterpret_u32_u16 (uint16x4_t __a)
3889 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3892 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3893 vreinterpret_u32_u64 (uint64x1_t __a)
3895 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3898 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3899 vreinterpret_u32_p8 (poly8x8_t __a)
3901 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3904 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3905 vreinterpret_u32_p16 (poly16x4_t __a)
3907 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3910 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3911 vreinterpretq_u32_s8 (int8x16_t __a)
3913 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3916 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3917 vreinterpretq_u32_s16 (int16x8_t __a)
3919 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3922 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3923 vreinterpretq_u32_s32 (int32x4_t __a)
3925 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3928 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3929 vreinterpretq_u32_s64 (int64x2_t __a)
3931 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3934 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3935 vreinterpretq_u32_f32 (float32x4_t __a)
3937 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3940 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3941 vreinterpretq_u32_u8 (uint8x16_t __a)
3943 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3944 __a);
3947 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3948 vreinterpretq_u32_u16 (uint16x8_t __a)
3950 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3953 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3954 vreinterpretq_u32_u64 (uint64x2_t __a)
3956 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3959 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3960 vreinterpretq_u32_p8 (poly8x16_t __a)
3962 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3963 __a);
3966 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3967 vreinterpretq_u32_p16 (poly16x8_t __a)
3969 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3972 #define __GET_LOW(__TYPE) \
3973 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
3974 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
3975 return vreinterpret_##__TYPE##_u64 (lo);
3977 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3978 vget_low_f32 (float32x4_t __a)
3980 __GET_LOW (f32);
3983 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
3984 vget_low_f64 (float64x2_t __a)
3986 return vgetq_lane_f64 (__a, 0);
3989 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
3990 vget_low_p8 (poly8x16_t __a)
3992 __GET_LOW (p8);
3995 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3996 vget_low_p16 (poly16x8_t __a)
3998 __GET_LOW (p16);
4001 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4002 vget_low_s8 (int8x16_t __a)
4004 __GET_LOW (s8);
4007 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4008 vget_low_s16 (int16x8_t __a)
4010 __GET_LOW (s16);
4013 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4014 vget_low_s32 (int32x4_t __a)
4016 __GET_LOW (s32);
4019 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4020 vget_low_s64 (int64x2_t __a)
4022 return vgetq_lane_s64 (__a, 0);
4025 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4026 vget_low_u8 (uint8x16_t __a)
4028 __GET_LOW (u8);
4031 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4032 vget_low_u16 (uint16x8_t __a)
4034 __GET_LOW (u16);
4037 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4038 vget_low_u32 (uint32x4_t __a)
4040 __GET_LOW (u32);
4043 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4044 vget_low_u64 (uint64x2_t __a)
4046 return vgetq_lane_u64 (__a, 0);
4049 #undef __GET_LOW
4051 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4052 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4054 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4057 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4058 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4060 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4063 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4064 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4066 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4069 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4070 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4072 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4075 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4076 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4078 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4081 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4082 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4084 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4085 (int8x8_t) __b);
4088 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4089 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4091 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4092 (int16x4_t) __b);
4095 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4096 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4098 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4099 (int32x2_t) __b);
4102 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4103 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4105 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4106 (int64x1_t) __b);
4109 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4110 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4112 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4115 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4116 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4118 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4119 (int8x8_t) __b);
4122 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4123 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4125 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4126 (int16x4_t) __b);
4129 /* Start of temporary inline asm implementations. */
4131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4132 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4134 int8x8_t result;
4135 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4136 : "=w"(result)
4137 : "0"(a), "w"(b), "w"(c)
4138 : /* No clobbers */);
4139 return result;
4142 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4143 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4145 int16x4_t result;
4146 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4147 : "=w"(result)
4148 : "0"(a), "w"(b), "w"(c)
4149 : /* No clobbers */);
4150 return result;
4153 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4154 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4156 int32x2_t result;
4157 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4158 : "=w"(result)
4159 : "0"(a), "w"(b), "w"(c)
4160 : /* No clobbers */);
4161 return result;
4164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4165 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4167 uint8x8_t result;
4168 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4169 : "=w"(result)
4170 : "0"(a), "w"(b), "w"(c)
4171 : /* No clobbers */);
4172 return result;
4175 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4176 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4178 uint16x4_t result;
4179 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4180 : "=w"(result)
4181 : "0"(a), "w"(b), "w"(c)
4182 : /* No clobbers */);
4183 return result;
4186 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4187 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4189 uint32x2_t result;
4190 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4191 : "=w"(result)
4192 : "0"(a), "w"(b), "w"(c)
4193 : /* No clobbers */);
4194 return result;
4197 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4198 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4200 int16x8_t result;
4201 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4202 : "=w"(result)
4203 : "0"(a), "w"(b), "w"(c)
4204 : /* No clobbers */);
4205 return result;
4208 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4209 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4211 int32x4_t result;
4212 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4213 : "=w"(result)
4214 : "0"(a), "w"(b), "w"(c)
4215 : /* No clobbers */);
4216 return result;
4219 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4220 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4222 int64x2_t result;
4223 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4224 : "=w"(result)
4225 : "0"(a), "w"(b), "w"(c)
4226 : /* No clobbers */);
4227 return result;
4230 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4231 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4233 uint16x8_t result;
4234 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4235 : "=w"(result)
4236 : "0"(a), "w"(b), "w"(c)
4237 : /* No clobbers */);
4238 return result;
4241 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4242 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4244 uint32x4_t result;
4245 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4246 : "=w"(result)
4247 : "0"(a), "w"(b), "w"(c)
4248 : /* No clobbers */);
4249 return result;
4252 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4253 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4255 uint64x2_t result;
4256 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4257 : "=w"(result)
4258 : "0"(a), "w"(b), "w"(c)
4259 : /* No clobbers */);
4260 return result;
4263 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4264 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4266 int16x8_t result;
4267 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4268 : "=w"(result)
4269 : "0"(a), "w"(b), "w"(c)
4270 : /* No clobbers */);
4271 return result;
4274 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4275 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4277 int32x4_t result;
4278 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4279 : "=w"(result)
4280 : "0"(a), "w"(b), "w"(c)
4281 : /* No clobbers */);
4282 return result;
4285 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4286 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4288 int64x2_t result;
4289 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4290 : "=w"(result)
4291 : "0"(a), "w"(b), "w"(c)
4292 : /* No clobbers */);
4293 return result;
4296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4297 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4299 uint16x8_t result;
4300 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4301 : "=w"(result)
4302 : "0"(a), "w"(b), "w"(c)
4303 : /* No clobbers */);
4304 return result;
4307 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4308 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4310 uint32x4_t result;
4311 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4312 : "=w"(result)
4313 : "0"(a), "w"(b), "w"(c)
4314 : /* No clobbers */);
4315 return result;
4318 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4319 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4321 uint64x2_t result;
4322 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4323 : "=w"(result)
4324 : "0"(a), "w"(b), "w"(c)
4325 : /* No clobbers */);
4326 return result;
4329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4330 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4332 int8x16_t result;
4333 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4334 : "=w"(result)
4335 : "0"(a), "w"(b), "w"(c)
4336 : /* No clobbers */);
4337 return result;
4340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4341 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4343 int16x8_t result;
4344 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4345 : "=w"(result)
4346 : "0"(a), "w"(b), "w"(c)
4347 : /* No clobbers */);
4348 return result;
4351 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4352 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4354 int32x4_t result;
4355 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4356 : "=w"(result)
4357 : "0"(a), "w"(b), "w"(c)
4358 : /* No clobbers */);
4359 return result;
4362 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4363 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4365 uint8x16_t result;
4366 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4367 : "=w"(result)
4368 : "0"(a), "w"(b), "w"(c)
4369 : /* No clobbers */);
4370 return result;
4373 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4374 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4376 uint16x8_t result;
4377 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4378 : "=w"(result)
4379 : "0"(a), "w"(b), "w"(c)
4380 : /* No clobbers */);
4381 return result;
4384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4385 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4387 uint32x4_t result;
4388 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4389 : "=w"(result)
4390 : "0"(a), "w"(b), "w"(c)
4391 : /* No clobbers */);
4392 return result;
4395 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4396 vabd_f32 (float32x2_t a, float32x2_t b)
4398 float32x2_t result;
4399 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4400 : "=w"(result)
4401 : "w"(a), "w"(b)
4402 : /* No clobbers */);
4403 return result;
4406 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4407 vabd_s8 (int8x8_t a, int8x8_t b)
4409 int8x8_t result;
4410 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4411 : "=w"(result)
4412 : "w"(a), "w"(b)
4413 : /* No clobbers */);
4414 return result;
4417 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4418 vabd_s16 (int16x4_t a, int16x4_t b)
4420 int16x4_t result;
4421 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4422 : "=w"(result)
4423 : "w"(a), "w"(b)
4424 : /* No clobbers */);
4425 return result;
4428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4429 vabd_s32 (int32x2_t a, int32x2_t b)
4431 int32x2_t result;
4432 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4433 : "=w"(result)
4434 : "w"(a), "w"(b)
4435 : /* No clobbers */);
4436 return result;
4439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4440 vabd_u8 (uint8x8_t a, uint8x8_t b)
4442 uint8x8_t result;
4443 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4444 : "=w"(result)
4445 : "w"(a), "w"(b)
4446 : /* No clobbers */);
4447 return result;
4450 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4451 vabd_u16 (uint16x4_t a, uint16x4_t b)
4453 uint16x4_t result;
4454 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4455 : "=w"(result)
4456 : "w"(a), "w"(b)
4457 : /* No clobbers */);
4458 return result;
4461 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4462 vabd_u32 (uint32x2_t a, uint32x2_t b)
4464 uint32x2_t result;
4465 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4466 : "=w"(result)
4467 : "w"(a), "w"(b)
4468 : /* No clobbers */);
4469 return result;
4472 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4473 vabdd_f64 (float64_t a, float64_t b)
4475 float64_t result;
4476 __asm__ ("fabd %d0, %d1, %d2"
4477 : "=w"(result)
4478 : "w"(a), "w"(b)
4479 : /* No clobbers */);
4480 return result;
4483 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4484 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4486 int16x8_t result;
4487 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4488 : "=w"(result)
4489 : "w"(a), "w"(b)
4490 : /* No clobbers */);
4491 return result;
4494 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4495 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4497 int32x4_t result;
4498 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4499 : "=w"(result)
4500 : "w"(a), "w"(b)
4501 : /* No clobbers */);
4502 return result;
4505 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4506 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4508 int64x2_t result;
4509 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4510 : "=w"(result)
4511 : "w"(a), "w"(b)
4512 : /* No clobbers */);
4513 return result;
4516 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4517 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4519 uint16x8_t result;
4520 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4521 : "=w"(result)
4522 : "w"(a), "w"(b)
4523 : /* No clobbers */);
4524 return result;
4527 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4528 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4530 uint32x4_t result;
4531 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4532 : "=w"(result)
4533 : "w"(a), "w"(b)
4534 : /* No clobbers */);
4535 return result;
4538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4539 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4541 uint64x2_t result;
4542 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4543 : "=w"(result)
4544 : "w"(a), "w"(b)
4545 : /* No clobbers */);
4546 return result;
4549 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4550 vabdl_s8 (int8x8_t a, int8x8_t b)
4552 int16x8_t result;
4553 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4554 : "=w"(result)
4555 : "w"(a), "w"(b)
4556 : /* No clobbers */);
4557 return result;
4560 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4561 vabdl_s16 (int16x4_t a, int16x4_t b)
4563 int32x4_t result;
4564 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4565 : "=w"(result)
4566 : "w"(a), "w"(b)
4567 : /* No clobbers */);
4568 return result;
4571 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4572 vabdl_s32 (int32x2_t a, int32x2_t b)
4574 int64x2_t result;
4575 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4576 : "=w"(result)
4577 : "w"(a), "w"(b)
4578 : /* No clobbers */);
4579 return result;
4582 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4583 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4585 uint16x8_t result;
4586 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4587 : "=w"(result)
4588 : "w"(a), "w"(b)
4589 : /* No clobbers */);
4590 return result;
4593 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4594 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4596 uint32x4_t result;
4597 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4598 : "=w"(result)
4599 : "w"(a), "w"(b)
4600 : /* No clobbers */);
4601 return result;
4604 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4605 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4607 uint64x2_t result;
4608 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4609 : "=w"(result)
4610 : "w"(a), "w"(b)
4611 : /* No clobbers */);
4612 return result;
4615 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4616 vabdq_f32 (float32x4_t a, float32x4_t b)
4618 float32x4_t result;
4619 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4620 : "=w"(result)
4621 : "w"(a), "w"(b)
4622 : /* No clobbers */);
4623 return result;
4626 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4627 vabdq_f64 (float64x2_t a, float64x2_t b)
4629 float64x2_t result;
4630 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4631 : "=w"(result)
4632 : "w"(a), "w"(b)
4633 : /* No clobbers */);
4634 return result;
4637 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4638 vabdq_s8 (int8x16_t a, int8x16_t b)
4640 int8x16_t result;
4641 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4642 : "=w"(result)
4643 : "w"(a), "w"(b)
4644 : /* No clobbers */);
4645 return result;
4648 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4649 vabdq_s16 (int16x8_t a, int16x8_t b)
4651 int16x8_t result;
4652 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4653 : "=w"(result)
4654 : "w"(a), "w"(b)
4655 : /* No clobbers */);
4656 return result;
4659 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4660 vabdq_s32 (int32x4_t a, int32x4_t b)
4662 int32x4_t result;
4663 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4664 : "=w"(result)
4665 : "w"(a), "w"(b)
4666 : /* No clobbers */);
4667 return result;
4670 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4671 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4673 uint8x16_t result;
4674 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4675 : "=w"(result)
4676 : "w"(a), "w"(b)
4677 : /* No clobbers */);
4678 return result;
4681 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4682 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4684 uint16x8_t result;
4685 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4686 : "=w"(result)
4687 : "w"(a), "w"(b)
4688 : /* No clobbers */);
4689 return result;
4692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4693 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4695 uint32x4_t result;
4696 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4697 : "=w"(result)
4698 : "w"(a), "w"(b)
4699 : /* No clobbers */);
4700 return result;
4703 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4704 vabds_f32 (float32_t a, float32_t b)
4706 float32_t result;
4707 __asm__ ("fabd %s0, %s1, %s2"
4708 : "=w"(result)
4709 : "w"(a), "w"(b)
4710 : /* No clobbers */);
4711 return result;
4714 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4715 vaddlv_s8 (int8x8_t a)
4717 int16_t result;
4718 __asm__ ("saddlv %h0,%1.8b"
4719 : "=w"(result)
4720 : "w"(a)
4721 : /* No clobbers */);
4722 return result;
4725 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4726 vaddlv_s16 (int16x4_t a)
4728 int32_t result;
4729 __asm__ ("saddlv %s0,%1.4h"
4730 : "=w"(result)
4731 : "w"(a)
4732 : /* No clobbers */);
4733 return result;
4736 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4737 vaddlv_u8 (uint8x8_t a)
4739 uint16_t result;
4740 __asm__ ("uaddlv %h0,%1.8b"
4741 : "=w"(result)
4742 : "w"(a)
4743 : /* No clobbers */);
4744 return result;
4747 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4748 vaddlv_u16 (uint16x4_t a)
4750 uint32_t result;
4751 __asm__ ("uaddlv %s0,%1.4h"
4752 : "=w"(result)
4753 : "w"(a)
4754 : /* No clobbers */);
4755 return result;
4758 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4759 vaddlvq_s8 (int8x16_t a)
4761 int16_t result;
4762 __asm__ ("saddlv %h0,%1.16b"
4763 : "=w"(result)
4764 : "w"(a)
4765 : /* No clobbers */);
4766 return result;
4769 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4770 vaddlvq_s16 (int16x8_t a)
4772 int32_t result;
4773 __asm__ ("saddlv %s0,%1.8h"
4774 : "=w"(result)
4775 : "w"(a)
4776 : /* No clobbers */);
4777 return result;
4780 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4781 vaddlvq_s32 (int32x4_t a)
4783 int64_t result;
4784 __asm__ ("saddlv %d0,%1.4s"
4785 : "=w"(result)
4786 : "w"(a)
4787 : /* No clobbers */);
4788 return result;
4791 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4792 vaddlvq_u8 (uint8x16_t a)
4794 uint16_t result;
4795 __asm__ ("uaddlv %h0,%1.16b"
4796 : "=w"(result)
4797 : "w"(a)
4798 : /* No clobbers */);
4799 return result;
4802 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4803 vaddlvq_u16 (uint16x8_t a)
4805 uint32_t result;
4806 __asm__ ("uaddlv %s0,%1.8h"
4807 : "=w"(result)
4808 : "w"(a)
4809 : /* No clobbers */);
4810 return result;
4813 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4814 vaddlvq_u32 (uint32x4_t a)
4816 uint64_t result;
4817 __asm__ ("uaddlv %d0,%1.4s"
4818 : "=w"(result)
4819 : "w"(a)
4820 : /* No clobbers */);
4821 return result;
4824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4825 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4827 float32x2_t result;
4828 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4829 : "=w"(result)
4830 : "0"(a), "w"(b), "w"(c)
4831 : /* No clobbers */);
4832 return result;
4835 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4836 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4838 poly8x8_t result;
4839 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4840 : "=w"(result)
4841 : "0"(a), "w"(b), "w"(c)
4842 : /* No clobbers */);
4843 return result;
4846 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4847 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4849 poly16x4_t result;
4850 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4851 : "=w"(result)
4852 : "0"(a), "w"(b), "w"(c)
4853 : /* No clobbers */);
4854 return result;
4857 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4858 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4860 int8x8_t result;
4861 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4862 : "=w"(result)
4863 : "0"(a), "w"(b), "w"(c)
4864 : /* No clobbers */);
4865 return result;
4868 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4869 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4871 int16x4_t result;
4872 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4873 : "=w"(result)
4874 : "0"(a), "w"(b), "w"(c)
4875 : /* No clobbers */);
4876 return result;
4879 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4880 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4882 int32x2_t result;
4883 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4884 : "=w"(result)
4885 : "0"(a), "w"(b), "w"(c)
4886 : /* No clobbers */);
4887 return result;
4890 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4891 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4893 int64x1_t result;
4894 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4895 : "=w"(result)
4896 : "0"(a), "w"(b), "w"(c)
4897 : /* No clobbers */);
4898 return result;
4901 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4902 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4904 uint8x8_t result;
4905 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4906 : "=w"(result)
4907 : "0"(a), "w"(b), "w"(c)
4908 : /* No clobbers */);
4909 return result;
4912 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4913 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4915 uint16x4_t result;
4916 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4917 : "=w"(result)
4918 : "0"(a), "w"(b), "w"(c)
4919 : /* No clobbers */);
4920 return result;
4923 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4924 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4926 uint32x2_t result;
4927 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4928 : "=w"(result)
4929 : "0"(a), "w"(b), "w"(c)
4930 : /* No clobbers */);
4931 return result;
4934 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4935 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4937 uint64x1_t result;
4938 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4939 : "=w"(result)
4940 : "0"(a), "w"(b), "w"(c)
4941 : /* No clobbers */);
4942 return result;
4945 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4946 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4948 float32x4_t result;
4949 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4950 : "=w"(result)
4951 : "0"(a), "w"(b), "w"(c)
4952 : /* No clobbers */);
4953 return result;
4956 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4957 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4959 float64x2_t result;
4960 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4961 : "=w"(result)
4962 : "0"(a), "w"(b), "w"(c)
4963 : /* No clobbers */);
4964 return result;
4967 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4968 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4970 poly8x16_t result;
4971 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4972 : "=w"(result)
4973 : "0"(a), "w"(b), "w"(c)
4974 : /* No clobbers */);
4975 return result;
4978 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4979 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
4981 poly16x8_t result;
4982 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4983 : "=w"(result)
4984 : "0"(a), "w"(b), "w"(c)
4985 : /* No clobbers */);
4986 return result;
4989 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4990 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
4992 int8x16_t result;
4993 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4994 : "=w"(result)
4995 : "0"(a), "w"(b), "w"(c)
4996 : /* No clobbers */);
4997 return result;
5000 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5001 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
5003 int16x8_t result;
5004 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5005 : "=w"(result)
5006 : "0"(a), "w"(b), "w"(c)
5007 : /* No clobbers */);
5008 return result;
5011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5012 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
5014 int32x4_t result;
5015 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5016 : "=w"(result)
5017 : "0"(a), "w"(b), "w"(c)
5018 : /* No clobbers */);
5019 return result;
5022 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5023 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
5025 int64x2_t result;
5026 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5027 : "=w"(result)
5028 : "0"(a), "w"(b), "w"(c)
5029 : /* No clobbers */);
5030 return result;
5033 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5034 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5036 uint8x16_t result;
5037 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5038 : "=w"(result)
5039 : "0"(a), "w"(b), "w"(c)
5040 : /* No clobbers */);
5041 return result;
5044 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5045 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5047 uint16x8_t result;
5048 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5049 : "=w"(result)
5050 : "0"(a), "w"(b), "w"(c)
5051 : /* No clobbers */);
5052 return result;
5055 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5056 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5058 uint32x4_t result;
5059 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5060 : "=w"(result)
5061 : "0"(a), "w"(b), "w"(c)
5062 : /* No clobbers */);
5063 return result;
5066 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5067 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
5069 uint64x2_t result;
5070 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5071 : "=w"(result)
5072 : "0"(a), "w"(b), "w"(c)
5073 : /* No clobbers */);
5074 return result;
5077 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5078 vcls_s8 (int8x8_t a)
5080 int8x8_t result;
5081 __asm__ ("cls %0.8b,%1.8b"
5082 : "=w"(result)
5083 : "w"(a)
5084 : /* No clobbers */);
5085 return result;
5088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5089 vcls_s16 (int16x4_t a)
5091 int16x4_t result;
5092 __asm__ ("cls %0.4h,%1.4h"
5093 : "=w"(result)
5094 : "w"(a)
5095 : /* No clobbers */);
5096 return result;
5099 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5100 vcls_s32 (int32x2_t a)
5102 int32x2_t result;
5103 __asm__ ("cls %0.2s,%1.2s"
5104 : "=w"(result)
5105 : "w"(a)
5106 : /* No clobbers */);
5107 return result;
5110 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5111 vclsq_s8 (int8x16_t a)
5113 int8x16_t result;
5114 __asm__ ("cls %0.16b,%1.16b"
5115 : "=w"(result)
5116 : "w"(a)
5117 : /* No clobbers */);
5118 return result;
5121 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5122 vclsq_s16 (int16x8_t a)
5124 int16x8_t result;
5125 __asm__ ("cls %0.8h,%1.8h"
5126 : "=w"(result)
5127 : "w"(a)
5128 : /* No clobbers */);
5129 return result;
5132 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5133 vclsq_s32 (int32x4_t a)
5135 int32x4_t result;
5136 __asm__ ("cls %0.4s,%1.4s"
5137 : "=w"(result)
5138 : "w"(a)
5139 : /* No clobbers */);
5140 return result;
5143 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5144 vclz_s8 (int8x8_t a)
5146 int8x8_t result;
5147 __asm__ ("clz %0.8b,%1.8b"
5148 : "=w"(result)
5149 : "w"(a)
5150 : /* No clobbers */);
5151 return result;
5154 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5155 vclz_s16 (int16x4_t a)
5157 int16x4_t result;
5158 __asm__ ("clz %0.4h,%1.4h"
5159 : "=w"(result)
5160 : "w"(a)
5161 : /* No clobbers */);
5162 return result;
5165 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5166 vclz_s32 (int32x2_t a)
5168 int32x2_t result;
5169 __asm__ ("clz %0.2s,%1.2s"
5170 : "=w"(result)
5171 : "w"(a)
5172 : /* No clobbers */);
5173 return result;
5176 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5177 vclz_u8 (uint8x8_t a)
5179 uint8x8_t result;
5180 __asm__ ("clz %0.8b,%1.8b"
5181 : "=w"(result)
5182 : "w"(a)
5183 : /* No clobbers */);
5184 return result;
5187 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5188 vclz_u16 (uint16x4_t a)
5190 uint16x4_t result;
5191 __asm__ ("clz %0.4h,%1.4h"
5192 : "=w"(result)
5193 : "w"(a)
5194 : /* No clobbers */);
5195 return result;
5198 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5199 vclz_u32 (uint32x2_t a)
5201 uint32x2_t result;
5202 __asm__ ("clz %0.2s,%1.2s"
5203 : "=w"(result)
5204 : "w"(a)
5205 : /* No clobbers */);
5206 return result;
5209 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5210 vclzq_s8 (int8x16_t a)
5212 int8x16_t result;
5213 __asm__ ("clz %0.16b,%1.16b"
5214 : "=w"(result)
5215 : "w"(a)
5216 : /* No clobbers */);
5217 return result;
5220 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5221 vclzq_s16 (int16x8_t a)
5223 int16x8_t result;
5224 __asm__ ("clz %0.8h,%1.8h"
5225 : "=w"(result)
5226 : "w"(a)
5227 : /* No clobbers */);
5228 return result;
5231 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5232 vclzq_s32 (int32x4_t a)
5234 int32x4_t result;
5235 __asm__ ("clz %0.4s,%1.4s"
5236 : "=w"(result)
5237 : "w"(a)
5238 : /* No clobbers */);
5239 return result;
5242 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5243 vclzq_u8 (uint8x16_t a)
5245 uint8x16_t result;
5246 __asm__ ("clz %0.16b,%1.16b"
5247 : "=w"(result)
5248 : "w"(a)
5249 : /* No clobbers */);
5250 return result;
5253 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5254 vclzq_u16 (uint16x8_t a)
5256 uint16x8_t result;
5257 __asm__ ("clz %0.8h,%1.8h"
5258 : "=w"(result)
5259 : "w"(a)
5260 : /* No clobbers */);
5261 return result;
5264 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5265 vclzq_u32 (uint32x4_t a)
5267 uint32x4_t result;
5268 __asm__ ("clz %0.4s,%1.4s"
5269 : "=w"(result)
5270 : "w"(a)
5271 : /* No clobbers */);
5272 return result;
5275 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5276 vcnt_p8 (poly8x8_t a)
5278 poly8x8_t result;
5279 __asm__ ("cnt %0.8b,%1.8b"
5280 : "=w"(result)
5281 : "w"(a)
5282 : /* No clobbers */);
5283 return result;
5286 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5287 vcnt_s8 (int8x8_t a)
5289 int8x8_t result;
5290 __asm__ ("cnt %0.8b,%1.8b"
5291 : "=w"(result)
5292 : "w"(a)
5293 : /* No clobbers */);
5294 return result;
5297 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5298 vcnt_u8 (uint8x8_t a)
5300 uint8x8_t result;
5301 __asm__ ("cnt %0.8b,%1.8b"
5302 : "=w"(result)
5303 : "w"(a)
5304 : /* No clobbers */);
5305 return result;
5308 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5309 vcntq_p8 (poly8x16_t a)
5311 poly8x16_t result;
5312 __asm__ ("cnt %0.16b,%1.16b"
5313 : "=w"(result)
5314 : "w"(a)
5315 : /* No clobbers */);
5316 return result;
5319 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5320 vcntq_s8 (int8x16_t a)
5322 int8x16_t result;
5323 __asm__ ("cnt %0.16b,%1.16b"
5324 : "=w"(result)
5325 : "w"(a)
5326 : /* No clobbers */);
5327 return result;
5330 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5331 vcntq_u8 (uint8x16_t a)
5333 uint8x16_t result;
5334 __asm__ ("cnt %0.16b,%1.16b"
5335 : "=w"(result)
5336 : "w"(a)
5337 : /* No clobbers */);
5338 return result;
5341 #define vcopyq_lane_f32(a, b, c, d) \
5342 __extension__ \
5343 ({ \
5344 float32x4_t c_ = (c); \
5345 float32x4_t a_ = (a); \
5346 float32x4_t result; \
5347 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5348 : "=w"(result) \
5349 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5350 : /* No clobbers */); \
5351 result; \
5354 #define vcopyq_lane_f64(a, b, c, d) \
5355 __extension__ \
5356 ({ \
5357 float64x2_t c_ = (c); \
5358 float64x2_t a_ = (a); \
5359 float64x2_t result; \
5360 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5361 : "=w"(result) \
5362 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5363 : /* No clobbers */); \
5364 result; \
5367 #define vcopyq_lane_p8(a, b, c, d) \
5368 __extension__ \
5369 ({ \
5370 poly8x16_t c_ = (c); \
5371 poly8x16_t a_ = (a); \
5372 poly8x16_t result; \
5373 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5374 : "=w"(result) \
5375 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5376 : /* No clobbers */); \
5377 result; \
5380 #define vcopyq_lane_p16(a, b, c, d) \
5381 __extension__ \
5382 ({ \
5383 poly16x8_t c_ = (c); \
5384 poly16x8_t a_ = (a); \
5385 poly16x8_t result; \
5386 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5387 : "=w"(result) \
5388 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5389 : /* No clobbers */); \
5390 result; \
5393 #define vcopyq_lane_s8(a, b, c, d) \
5394 __extension__ \
5395 ({ \
5396 int8x16_t c_ = (c); \
5397 int8x16_t a_ = (a); \
5398 int8x16_t result; \
5399 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5400 : "=w"(result) \
5401 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5402 : /* No clobbers */); \
5403 result; \
5406 #define vcopyq_lane_s16(a, b, c, d) \
5407 __extension__ \
5408 ({ \
5409 int16x8_t c_ = (c); \
5410 int16x8_t a_ = (a); \
5411 int16x8_t result; \
5412 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5413 : "=w"(result) \
5414 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5415 : /* No clobbers */); \
5416 result; \
5419 #define vcopyq_lane_s32(a, b, c, d) \
5420 __extension__ \
5421 ({ \
5422 int32x4_t c_ = (c); \
5423 int32x4_t a_ = (a); \
5424 int32x4_t result; \
5425 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5426 : "=w"(result) \
5427 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5428 : /* No clobbers */); \
5429 result; \
5432 #define vcopyq_lane_s64(a, b, c, d) \
5433 __extension__ \
5434 ({ \
5435 int64x2_t c_ = (c); \
5436 int64x2_t a_ = (a); \
5437 int64x2_t result; \
5438 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5439 : "=w"(result) \
5440 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5441 : /* No clobbers */); \
5442 result; \
5445 #define vcopyq_lane_u8(a, b, c, d) \
5446 __extension__ \
5447 ({ \
5448 uint8x16_t c_ = (c); \
5449 uint8x16_t a_ = (a); \
5450 uint8x16_t result; \
5451 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5452 : "=w"(result) \
5453 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5454 : /* No clobbers */); \
5455 result; \
5458 #define vcopyq_lane_u16(a, b, c, d) \
5459 __extension__ \
5460 ({ \
5461 uint16x8_t c_ = (c); \
5462 uint16x8_t a_ = (a); \
5463 uint16x8_t result; \
5464 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5465 : "=w"(result) \
5466 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5467 : /* No clobbers */); \
5468 result; \
5471 #define vcopyq_lane_u32(a, b, c, d) \
5472 __extension__ \
5473 ({ \
5474 uint32x4_t c_ = (c); \
5475 uint32x4_t a_ = (a); \
5476 uint32x4_t result; \
5477 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5478 : "=w"(result) \
5479 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5480 : /* No clobbers */); \
5481 result; \
5484 #define vcopyq_lane_u64(a, b, c, d) \
5485 __extension__ \
5486 ({ \
5487 uint64x2_t c_ = (c); \
5488 uint64x2_t a_ = (a); \
5489 uint64x2_t result; \
5490 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5491 : "=w"(result) \
5492 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5493 : /* No clobbers */); \
5494 result; \
5497 /* vcvt_f16_f32 not supported */
5499 /* vcvt_f32_f16 not supported */
5501 /* vcvt_high_f16_f32 not supported */
5503 /* vcvt_high_f32_f16 not supported */
5505 static float32x2_t vdup_n_f32 (float32_t);
5507 #define vcvt_n_f32_s32(a, b) \
5508 __extension__ \
5509 ({ \
5510 int32x2_t a_ = (a); \
5511 float32x2_t result; \
5512 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5513 : "=w"(result) \
5514 : "w"(a_), "i"(b) \
5515 : /* No clobbers */); \
5516 result; \
5519 #define vcvt_n_f32_u32(a, b) \
5520 __extension__ \
5521 ({ \
5522 uint32x2_t a_ = (a); \
5523 float32x2_t result; \
5524 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5525 : "=w"(result) \
5526 : "w"(a_), "i"(b) \
5527 : /* No clobbers */); \
5528 result; \
5531 #define vcvt_n_s32_f32(a, b) \
5532 __extension__ \
5533 ({ \
5534 float32x2_t a_ = (a); \
5535 int32x2_t result; \
5536 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5537 : "=w"(result) \
5538 : "w"(a_), "i"(b) \
5539 : /* No clobbers */); \
5540 result; \
5543 #define vcvt_n_u32_f32(a, b) \
5544 __extension__ \
5545 ({ \
5546 float32x2_t a_ = (a); \
5547 uint32x2_t result; \
5548 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5549 : "=w"(result) \
5550 : "w"(a_), "i"(b) \
5551 : /* No clobbers */); \
5552 result; \
5555 #define vcvtd_n_f64_s64(a, b) \
5556 __extension__ \
5557 ({ \
5558 int64_t a_ = (a); \
5559 int64_t result; \
5560 __asm__ ("scvtf %d0,%d1,%2" \
5561 : "=w"(result) \
5562 : "w"(a_), "i"(b) \
5563 : /* No clobbers */); \
5564 result; \
5567 #define vcvtd_n_f64_u64(a, b) \
5568 __extension__ \
5569 ({ \
5570 uint64_t a_ = (a); \
5571 uint64_t result; \
5572 __asm__ ("ucvtf %d0,%d1,%2" \
5573 : "=w"(result) \
5574 : "w"(a_), "i"(b) \
5575 : /* No clobbers */); \
5576 result; \
5579 #define vcvtd_n_s64_f64(a, b) \
5580 __extension__ \
5581 ({ \
5582 float64_t a_ = (a); \
5583 float64_t result; \
5584 __asm__ ("fcvtzs %d0,%d1,%2" \
5585 : "=w"(result) \
5586 : "w"(a_), "i"(b) \
5587 : /* No clobbers */); \
5588 result; \
5591 #define vcvtd_n_u64_f64(a, b) \
5592 __extension__ \
5593 ({ \
5594 float64_t a_ = (a); \
5595 float64_t result; \
5596 __asm__ ("fcvtzu %d0,%d1,%2" \
5597 : "=w"(result) \
5598 : "w"(a_), "i"(b) \
5599 : /* No clobbers */); \
5600 result; \
5603 #define vcvtq_n_f32_s32(a, b) \
5604 __extension__ \
5605 ({ \
5606 int32x4_t a_ = (a); \
5607 float32x4_t result; \
5608 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5609 : "=w"(result) \
5610 : "w"(a_), "i"(b) \
5611 : /* No clobbers */); \
5612 result; \
5615 #define vcvtq_n_f32_u32(a, b) \
5616 __extension__ \
5617 ({ \
5618 uint32x4_t a_ = (a); \
5619 float32x4_t result; \
5620 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5621 : "=w"(result) \
5622 : "w"(a_), "i"(b) \
5623 : /* No clobbers */); \
5624 result; \
5627 #define vcvtq_n_f64_s64(a, b) \
5628 __extension__ \
5629 ({ \
5630 int64x2_t a_ = (a); \
5631 float64x2_t result; \
5632 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5633 : "=w"(result) \
5634 : "w"(a_), "i"(b) \
5635 : /* No clobbers */); \
5636 result; \
5639 #define vcvtq_n_f64_u64(a, b) \
5640 __extension__ \
5641 ({ \
5642 uint64x2_t a_ = (a); \
5643 float64x2_t result; \
5644 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5645 : "=w"(result) \
5646 : "w"(a_), "i"(b) \
5647 : /* No clobbers */); \
5648 result; \
5651 #define vcvtq_n_s32_f32(a, b) \
5652 __extension__ \
5653 ({ \
5654 float32x4_t a_ = (a); \
5655 int32x4_t result; \
5656 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5657 : "=w"(result) \
5658 : "w"(a_), "i"(b) \
5659 : /* No clobbers */); \
5660 result; \
5663 #define vcvtq_n_s64_f64(a, b) \
5664 __extension__ \
5665 ({ \
5666 float64x2_t a_ = (a); \
5667 int64x2_t result; \
5668 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5669 : "=w"(result) \
5670 : "w"(a_), "i"(b) \
5671 : /* No clobbers */); \
5672 result; \
5675 #define vcvtq_n_u32_f32(a, b) \
5676 __extension__ \
5677 ({ \
5678 float32x4_t a_ = (a); \
5679 uint32x4_t result; \
5680 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5681 : "=w"(result) \
5682 : "w"(a_), "i"(b) \
5683 : /* No clobbers */); \
5684 result; \
5687 #define vcvtq_n_u64_f64(a, b) \
5688 __extension__ \
5689 ({ \
5690 float64x2_t a_ = (a); \
5691 uint64x2_t result; \
5692 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5693 : "=w"(result) \
5694 : "w"(a_), "i"(b) \
5695 : /* No clobbers */); \
5696 result; \
5699 #define vcvts_n_f32_s32(a, b) \
5700 __extension__ \
5701 ({ \
5702 int32_t a_ = (a); \
5703 int32_t result; \
5704 __asm__ ("scvtf %s0,%s1,%2" \
5705 : "=w"(result) \
5706 : "w"(a_), "i"(b) \
5707 : /* No clobbers */); \
5708 result; \
5711 #define vcvts_n_f32_u32(a, b) \
5712 __extension__ \
5713 ({ \
5714 uint32_t a_ = (a); \
5715 uint32_t result; \
5716 __asm__ ("ucvtf %s0,%s1,%2" \
5717 : "=w"(result) \
5718 : "w"(a_), "i"(b) \
5719 : /* No clobbers */); \
5720 result; \
5723 #define vcvts_n_s32_f32(a, b) \
5724 __extension__ \
5725 ({ \
5726 float32_t a_ = (a); \
5727 float32_t result; \
5728 __asm__ ("fcvtzs %s0,%s1,%2" \
5729 : "=w"(result) \
5730 : "w"(a_), "i"(b) \
5731 : /* No clobbers */); \
5732 result; \
5735 #define vcvts_n_u32_f32(a, b) \
5736 __extension__ \
5737 ({ \
5738 float32_t a_ = (a); \
5739 float32_t result; \
5740 __asm__ ("fcvtzu %s0,%s1,%2" \
5741 : "=w"(result) \
5742 : "w"(a_), "i"(b) \
5743 : /* No clobbers */); \
5744 result; \
5747 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5748 vcvtx_f32_f64 (float64x2_t a)
5750 float32x2_t result;
5751 __asm__ ("fcvtxn %0.2s,%1.2d"
5752 : "=w"(result)
5753 : "w"(a)
5754 : /* No clobbers */);
5755 return result;
5758 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5759 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5761 float32x4_t result;
5762 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5763 : "=w"(result)
5764 : "w" (b), "0"(a)
5765 : /* No clobbers */);
5766 return result;
5769 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5770 vcvtxd_f32_f64 (float64_t a)
5772 float32_t result;
5773 __asm__ ("fcvtxn %s0,%d1"
5774 : "=w"(result)
5775 : "w"(a)
5776 : /* No clobbers */);
5777 return result;
5780 #define vext_f32(a, b, c) \
5781 __extension__ \
5782 ({ \
5783 float32x2_t b_ = (b); \
5784 float32x2_t a_ = (a); \
5785 float32x2_t result; \
5786 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5787 : "=w"(result) \
5788 : "w"(a_), "w"(b_), "i"(c) \
5789 : /* No clobbers */); \
5790 result; \
5793 #define vext_f64(a, b, c) \
5794 __extension__ \
5795 ({ \
5796 float64x1_t b_ = (b); \
5797 float64x1_t a_ = (a); \
5798 float64x1_t result; \
5799 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5800 : "=w"(result) \
5801 : "w"(a_), "w"(b_), "i"(c) \
5802 : /* No clobbers */); \
5803 result; \
5806 #define vext_p8(a, b, c) \
5807 __extension__ \
5808 ({ \
5809 poly8x8_t b_ = (b); \
5810 poly8x8_t a_ = (a); \
5811 poly8x8_t result; \
5812 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5813 : "=w"(result) \
5814 : "w"(a_), "w"(b_), "i"(c) \
5815 : /* No clobbers */); \
5816 result; \
5819 #define vext_p16(a, b, c) \
5820 __extension__ \
5821 ({ \
5822 poly16x4_t b_ = (b); \
5823 poly16x4_t a_ = (a); \
5824 poly16x4_t result; \
5825 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5826 : "=w"(result) \
5827 : "w"(a_), "w"(b_), "i"(c) \
5828 : /* No clobbers */); \
5829 result; \
5832 #define vext_s8(a, b, c) \
5833 __extension__ \
5834 ({ \
5835 int8x8_t b_ = (b); \
5836 int8x8_t a_ = (a); \
5837 int8x8_t result; \
5838 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5839 : "=w"(result) \
5840 : "w"(a_), "w"(b_), "i"(c) \
5841 : /* No clobbers */); \
5842 result; \
5845 #define vext_s16(a, b, c) \
5846 __extension__ \
5847 ({ \
5848 int16x4_t b_ = (b); \
5849 int16x4_t a_ = (a); \
5850 int16x4_t result; \
5851 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5852 : "=w"(result) \
5853 : "w"(a_), "w"(b_), "i"(c) \
5854 : /* No clobbers */); \
5855 result; \
5858 #define vext_s32(a, b, c) \
5859 __extension__ \
5860 ({ \
5861 int32x2_t b_ = (b); \
5862 int32x2_t a_ = (a); \
5863 int32x2_t result; \
5864 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5865 : "=w"(result) \
5866 : "w"(a_), "w"(b_), "i"(c) \
5867 : /* No clobbers */); \
5868 result; \
5871 #define vext_s64(a, b, c) \
5872 __extension__ \
5873 ({ \
5874 int64x1_t b_ = (b); \
5875 int64x1_t a_ = (a); \
5876 int64x1_t result; \
5877 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5878 : "=w"(result) \
5879 : "w"(a_), "w"(b_), "i"(c) \
5880 : /* No clobbers */); \
5881 result; \
5884 #define vext_u8(a, b, c) \
5885 __extension__ \
5886 ({ \
5887 uint8x8_t b_ = (b); \
5888 uint8x8_t a_ = (a); \
5889 uint8x8_t result; \
5890 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5891 : "=w"(result) \
5892 : "w"(a_), "w"(b_), "i"(c) \
5893 : /* No clobbers */); \
5894 result; \
5897 #define vext_u16(a, b, c) \
5898 __extension__ \
5899 ({ \
5900 uint16x4_t b_ = (b); \
5901 uint16x4_t a_ = (a); \
5902 uint16x4_t result; \
5903 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5904 : "=w"(result) \
5905 : "w"(a_), "w"(b_), "i"(c) \
5906 : /* No clobbers */); \
5907 result; \
5910 #define vext_u32(a, b, c) \
5911 __extension__ \
5912 ({ \
5913 uint32x2_t b_ = (b); \
5914 uint32x2_t a_ = (a); \
5915 uint32x2_t result; \
5916 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5917 : "=w"(result) \
5918 : "w"(a_), "w"(b_), "i"(c) \
5919 : /* No clobbers */); \
5920 result; \
5923 #define vext_u64(a, b, c) \
5924 __extension__ \
5925 ({ \
5926 uint64x1_t b_ = (b); \
5927 uint64x1_t a_ = (a); \
5928 uint64x1_t result; \
5929 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5930 : "=w"(result) \
5931 : "w"(a_), "w"(b_), "i"(c) \
5932 : /* No clobbers */); \
5933 result; \
5936 #define vextq_f32(a, b, c) \
5937 __extension__ \
5938 ({ \
5939 float32x4_t b_ = (b); \
5940 float32x4_t a_ = (a); \
5941 float32x4_t result; \
5942 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5943 : "=w"(result) \
5944 : "w"(a_), "w"(b_), "i"(c) \
5945 : /* No clobbers */); \
5946 result; \
5949 #define vextq_f64(a, b, c) \
5950 __extension__ \
5951 ({ \
5952 float64x2_t b_ = (b); \
5953 float64x2_t a_ = (a); \
5954 float64x2_t result; \
5955 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5956 : "=w"(result) \
5957 : "w"(a_), "w"(b_), "i"(c) \
5958 : /* No clobbers */); \
5959 result; \
5962 #define vextq_p8(a, b, c) \
5963 __extension__ \
5964 ({ \
5965 poly8x16_t b_ = (b); \
5966 poly8x16_t a_ = (a); \
5967 poly8x16_t result; \
5968 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5969 : "=w"(result) \
5970 : "w"(a_), "w"(b_), "i"(c) \
5971 : /* No clobbers */); \
5972 result; \
5975 #define vextq_p16(a, b, c) \
5976 __extension__ \
5977 ({ \
5978 poly16x8_t b_ = (b); \
5979 poly16x8_t a_ = (a); \
5980 poly16x8_t result; \
5981 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5982 : "=w"(result) \
5983 : "w"(a_), "w"(b_), "i"(c) \
5984 : /* No clobbers */); \
5985 result; \
5988 #define vextq_s8(a, b, c) \
5989 __extension__ \
5990 ({ \
5991 int8x16_t b_ = (b); \
5992 int8x16_t a_ = (a); \
5993 int8x16_t result; \
5994 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5995 : "=w"(result) \
5996 : "w"(a_), "w"(b_), "i"(c) \
5997 : /* No clobbers */); \
5998 result; \
6001 #define vextq_s16(a, b, c) \
6002 __extension__ \
6003 ({ \
6004 int16x8_t b_ = (b); \
6005 int16x8_t a_ = (a); \
6006 int16x8_t result; \
6007 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6008 : "=w"(result) \
6009 : "w"(a_), "w"(b_), "i"(c) \
6010 : /* No clobbers */); \
6011 result; \
6014 #define vextq_s32(a, b, c) \
6015 __extension__ \
6016 ({ \
6017 int32x4_t b_ = (b); \
6018 int32x4_t a_ = (a); \
6019 int32x4_t result; \
6020 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6021 : "=w"(result) \
6022 : "w"(a_), "w"(b_), "i"(c) \
6023 : /* No clobbers */); \
6024 result; \
6027 #define vextq_s64(a, b, c) \
6028 __extension__ \
6029 ({ \
6030 int64x2_t b_ = (b); \
6031 int64x2_t a_ = (a); \
6032 int64x2_t result; \
6033 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6034 : "=w"(result) \
6035 : "w"(a_), "w"(b_), "i"(c) \
6036 : /* No clobbers */); \
6037 result; \
6040 #define vextq_u8(a, b, c) \
6041 __extension__ \
6042 ({ \
6043 uint8x16_t b_ = (b); \
6044 uint8x16_t a_ = (a); \
6045 uint8x16_t result; \
6046 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6047 : "=w"(result) \
6048 : "w"(a_), "w"(b_), "i"(c) \
6049 : /* No clobbers */); \
6050 result; \
6053 #define vextq_u16(a, b, c) \
6054 __extension__ \
6055 ({ \
6056 uint16x8_t b_ = (b); \
6057 uint16x8_t a_ = (a); \
6058 uint16x8_t result; \
6059 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6060 : "=w"(result) \
6061 : "w"(a_), "w"(b_), "i"(c) \
6062 : /* No clobbers */); \
6063 result; \
6066 #define vextq_u32(a, b, c) \
6067 __extension__ \
6068 ({ \
6069 uint32x4_t b_ = (b); \
6070 uint32x4_t a_ = (a); \
6071 uint32x4_t result; \
6072 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6073 : "=w"(result) \
6074 : "w"(a_), "w"(b_), "i"(c) \
6075 : /* No clobbers */); \
6076 result; \
6079 #define vextq_u64(a, b, c) \
6080 __extension__ \
6081 ({ \
6082 uint64x2_t b_ = (b); \
6083 uint64x2_t a_ = (a); \
6084 uint64x2_t result; \
6085 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6086 : "=w"(result) \
6087 : "w"(a_), "w"(b_), "i"(c) \
6088 : /* No clobbers */); \
6089 result; \
6092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6093 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6095 float32x2_t result;
6096 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
6097 : "=w"(result)
6098 : "0"(a), "w"(b), "w"(c)
6099 : /* No clobbers */);
6100 return result;
6103 #define vfma_lane_f32(a, b, c, d) \
6104 __extension__ \
6105 ({ \
6106 float32x2_t c_ = (c); \
6107 float32x2_t b_ = (b); \
6108 float32x2_t a_ = (a); \
6109 float32x2_t result; \
6110 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
6111 : "=w"(result) \
6112 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6113 : /* No clobbers */); \
6114 result; \
6117 #define vfmad_lane_f64(a, b, c) \
6118 __extension__ \
6119 ({ \
6120 float64x2_t b_ = (b); \
6121 float64_t a_ = (a); \
6122 float64_t result; \
6123 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
6124 : "=w"(result) \
6125 : "w"(a_), "w"(b_), "i"(c) \
6126 : /* No clobbers */); \
6127 result; \
6130 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6131 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6133 float32x4_t result;
6134 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
6135 : "=w"(result)
6136 : "0"(a), "w"(b), "w"(c)
6137 : /* No clobbers */);
6138 return result;
6141 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6142 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6144 float64x2_t result;
6145 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6146 : "=w"(result)
6147 : "0"(a), "w"(b), "w"(c)
6148 : /* No clobbers */);
6149 return result;
6152 #define vfmaq_lane_f32(a, b, c, d) \
6153 __extension__ \
6154 ({ \
6155 float32x4_t c_ = (c); \
6156 float32x4_t b_ = (b); \
6157 float32x4_t a_ = (a); \
6158 float32x4_t result; \
6159 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
6160 : "=w"(result) \
6161 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6162 : /* No clobbers */); \
6163 result; \
6166 #define vfmaq_lane_f64(a, b, c, d) \
6167 __extension__ \
6168 ({ \
6169 float64x2_t c_ = (c); \
6170 float64x2_t b_ = (b); \
6171 float64x2_t a_ = (a); \
6172 float64x2_t result; \
6173 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
6174 : "=w"(result) \
6175 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6176 : /* No clobbers */); \
6177 result; \
6180 #define vfmas_lane_f32(a, b, c) \
6181 __extension__ \
6182 ({ \
6183 float32x4_t b_ = (b); \
6184 float32_t a_ = (a); \
6185 float32_t result; \
6186 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
6187 : "=w"(result) \
6188 : "w"(a_), "w"(b_), "i"(c) \
6189 : /* No clobbers */); \
6190 result; \
6193 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6194 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6196 float32x2_t result;
6197 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6198 : "=w"(result)
6199 : "0"(a), "w"(b), "w"(c)
6200 : /* No clobbers */);
6201 return result;
6204 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6205 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6207 float32x4_t result;
6208 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6209 : "=w"(result)
6210 : "0"(a), "w"(b), "w"(c)
6211 : /* No clobbers */);
6212 return result;
6215 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6216 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
6218 float64x2_t result;
6219 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6220 : "=w"(result)
6221 : "0"(a), "w"(b), "w"(c)
6222 : /* No clobbers */);
6223 return result;
6226 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6227 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6229 float32x2_t result;
6230 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6231 : "=w"(result)
6232 : "0"(a), "w"(b), "w"(c)
6233 : /* No clobbers */);
6234 return result;
6237 #define vfmsd_lane_f64(a, b, c) \
6238 __extension__ \
6239 ({ \
6240 float64x2_t b_ = (b); \
6241 float64_t a_ = (a); \
6242 float64_t result; \
6243 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
6244 : "=w"(result) \
6245 : "w"(a_), "w"(b_), "i"(c) \
6246 : /* No clobbers */); \
6247 result; \
6250 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6251 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6253 float32x4_t result;
6254 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6255 : "=w"(result)
6256 : "0"(a), "w"(b), "w"(c)
6257 : /* No clobbers */);
6258 return result;
6261 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6262 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6264 float64x2_t result;
6265 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6266 : "=w"(result)
6267 : "0"(a), "w"(b), "w"(c)
6268 : /* No clobbers */);
6269 return result;
6272 #define vfmss_lane_f32(a, b, c) \
6273 __extension__ \
6274 ({ \
6275 float32x4_t b_ = (b); \
6276 float32_t a_ = (a); \
6277 float32_t result; \
6278 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
6279 : "=w"(result) \
6280 : "w"(a_), "w"(b_), "i"(c) \
6281 : /* No clobbers */); \
6282 result; \
6285 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6286 vget_high_f32 (float32x4_t a)
6288 float32x2_t result;
6289 __asm__ ("ins %0.d[0], %1.d[1]"
6290 : "=w"(result)
6291 : "w"(a)
6292 : /* No clobbers */);
6293 return result;
6296 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6297 vget_high_f64 (float64x2_t a)
6299 float64x1_t result;
6300 __asm__ ("ins %0.d[0], %1.d[1]"
6301 : "=w"(result)
6302 : "w"(a)
6303 : /* No clobbers */);
6304 return result;
6307 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6308 vget_high_p8 (poly8x16_t a)
6310 poly8x8_t result;
6311 __asm__ ("ins %0.d[0], %1.d[1]"
6312 : "=w"(result)
6313 : "w"(a)
6314 : /* No clobbers */);
6315 return result;
6318 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6319 vget_high_p16 (poly16x8_t a)
6321 poly16x4_t result;
6322 __asm__ ("ins %0.d[0], %1.d[1]"
6323 : "=w"(result)
6324 : "w"(a)
6325 : /* No clobbers */);
6326 return result;
6329 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6330 vget_high_s8 (int8x16_t a)
6332 int8x8_t result;
6333 __asm__ ("ins %0.d[0], %1.d[1]"
6334 : "=w"(result)
6335 : "w"(a)
6336 : /* No clobbers */);
6337 return result;
6340 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6341 vget_high_s16 (int16x8_t a)
6343 int16x4_t result;
6344 __asm__ ("ins %0.d[0], %1.d[1]"
6345 : "=w"(result)
6346 : "w"(a)
6347 : /* No clobbers */);
6348 return result;
6351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6352 vget_high_s32 (int32x4_t a)
6354 int32x2_t result;
6355 __asm__ ("ins %0.d[0], %1.d[1]"
6356 : "=w"(result)
6357 : "w"(a)
6358 : /* No clobbers */);
6359 return result;
6362 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6363 vget_high_s64 (int64x2_t a)
6365 int64x1_t result;
6366 __asm__ ("ins %0.d[0], %1.d[1]"
6367 : "=w"(result)
6368 : "w"(a)
6369 : /* No clobbers */);
6370 return result;
6373 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6374 vget_high_u8 (uint8x16_t a)
6376 uint8x8_t result;
6377 __asm__ ("ins %0.d[0], %1.d[1]"
6378 : "=w"(result)
6379 : "w"(a)
6380 : /* No clobbers */);
6381 return result;
6384 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6385 vget_high_u16 (uint16x8_t a)
6387 uint16x4_t result;
6388 __asm__ ("ins %0.d[0], %1.d[1]"
6389 : "=w"(result)
6390 : "w"(a)
6391 : /* No clobbers */);
6392 return result;
6395 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6396 vget_high_u32 (uint32x4_t a)
6398 uint32x2_t result;
6399 __asm__ ("ins %0.d[0], %1.d[1]"
6400 : "=w"(result)
6401 : "w"(a)
6402 : /* No clobbers */);
6403 return result;
6406 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6407 vget_high_u64 (uint64x2_t a)
6409 uint64x1_t result;
6410 __asm__ ("ins %0.d[0], %1.d[1]"
6411 : "=w"(result)
6412 : "w"(a)
6413 : /* No clobbers */);
6414 return result;
6417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6418 vhsub_s8 (int8x8_t a, int8x8_t b)
6420 int8x8_t result;
6421 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6422 : "=w"(result)
6423 : "w"(a), "w"(b)
6424 : /* No clobbers */);
6425 return result;
6428 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6429 vhsub_s16 (int16x4_t a, int16x4_t b)
6431 int16x4_t result;
6432 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6433 : "=w"(result)
6434 : "w"(a), "w"(b)
6435 : /* No clobbers */);
6436 return result;
6439 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6440 vhsub_s32 (int32x2_t a, int32x2_t b)
6442 int32x2_t result;
6443 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6444 : "=w"(result)
6445 : "w"(a), "w"(b)
6446 : /* No clobbers */);
6447 return result;
6450 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6451 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6453 uint8x8_t result;
6454 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6455 : "=w"(result)
6456 : "w"(a), "w"(b)
6457 : /* No clobbers */);
6458 return result;
6461 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6462 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6464 uint16x4_t result;
6465 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6466 : "=w"(result)
6467 : "w"(a), "w"(b)
6468 : /* No clobbers */);
6469 return result;
6472 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6473 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6475 uint32x2_t result;
6476 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6477 : "=w"(result)
6478 : "w"(a), "w"(b)
6479 : /* No clobbers */);
6480 return result;
6483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6484 vhsubq_s8 (int8x16_t a, int8x16_t b)
6486 int8x16_t result;
6487 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6488 : "=w"(result)
6489 : "w"(a), "w"(b)
6490 : /* No clobbers */);
6491 return result;
6494 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6495 vhsubq_s16 (int16x8_t a, int16x8_t b)
6497 int16x8_t result;
6498 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6499 : "=w"(result)
6500 : "w"(a), "w"(b)
6501 : /* No clobbers */);
6502 return result;
6505 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6506 vhsubq_s32 (int32x4_t a, int32x4_t b)
6508 int32x4_t result;
6509 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6510 : "=w"(result)
6511 : "w"(a), "w"(b)
6512 : /* No clobbers */);
6513 return result;
6516 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6517 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6519 uint8x16_t result;
6520 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6521 : "=w"(result)
6522 : "w"(a), "w"(b)
6523 : /* No clobbers */);
6524 return result;
6527 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6528 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6530 uint16x8_t result;
6531 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6532 : "=w"(result)
6533 : "w"(a), "w"(b)
6534 : /* No clobbers */);
6535 return result;
6538 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6539 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6541 uint32x4_t result;
6542 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6543 : "=w"(result)
6544 : "w"(a), "w"(b)
6545 : /* No clobbers */);
6546 return result;
6549 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6550 vld1_dup_f32 (const float32_t * a)
6552 float32x2_t result;
6553 __asm__ ("ld1r {%0.2s}, %1"
6554 : "=w"(result)
6555 : "Utv"(*a)
6556 : /* No clobbers */);
6557 return result;
6560 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6561 vld1_dup_f64 (const float64_t * a)
6563 float64x1_t result;
6564 __asm__ ("ld1r {%0.1d}, %1"
6565 : "=w"(result)
6566 : "Utv"(*a)
6567 : /* No clobbers */);
6568 return result;
6571 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6572 vld1_dup_p8 (const poly8_t * a)
6574 poly8x8_t result;
6575 __asm__ ("ld1r {%0.8b}, %1"
6576 : "=w"(result)
6577 : "Utv"(*a)
6578 : /* No clobbers */);
6579 return result;
6582 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6583 vld1_dup_p16 (const poly16_t * a)
6585 poly16x4_t result;
6586 __asm__ ("ld1r {%0.4h}, %1"
6587 : "=w"(result)
6588 : "Utv"(*a)
6589 : /* No clobbers */);
6590 return result;
6593 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6594 vld1_dup_s8 (const int8_t * a)
6596 int8x8_t result;
6597 __asm__ ("ld1r {%0.8b}, %1"
6598 : "=w"(result)
6599 : "Utv"(*a)
6600 : /* No clobbers */);
6601 return result;
6604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6605 vld1_dup_s16 (const int16_t * a)
6607 int16x4_t result;
6608 __asm__ ("ld1r {%0.4h}, %1"
6609 : "=w"(result)
6610 : "Utv"(*a)
6611 : /* No clobbers */);
6612 return result;
6615 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6616 vld1_dup_s32 (const int32_t * a)
6618 int32x2_t result;
6619 __asm__ ("ld1r {%0.2s}, %1"
6620 : "=w"(result)
6621 : "Utv"(*a)
6622 : /* No clobbers */);
6623 return result;
6626 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6627 vld1_dup_s64 (const int64_t * a)
6629 int64x1_t result;
6630 __asm__ ("ld1r {%0.1d}, %1"
6631 : "=w"(result)
6632 : "Utv"(*a)
6633 : /* No clobbers */);
6634 return result;
6637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6638 vld1_dup_u8 (const uint8_t * a)
6640 uint8x8_t result;
6641 __asm__ ("ld1r {%0.8b}, %1"
6642 : "=w"(result)
6643 : "Utv"(*a)
6644 : /* No clobbers */);
6645 return result;
6648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6649 vld1_dup_u16 (const uint16_t * a)
6651 uint16x4_t result;
6652 __asm__ ("ld1r {%0.4h}, %1"
6653 : "=w"(result)
6654 : "Utv"(*a)
6655 : /* No clobbers */);
6656 return result;
6659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6660 vld1_dup_u32 (const uint32_t * a)
6662 uint32x2_t result;
6663 __asm__ ("ld1r {%0.2s}, %1"
6664 : "=w"(result)
6665 : "Utv"(*a)
6666 : /* No clobbers */);
6667 return result;
6670 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6671 vld1_dup_u64 (const uint64_t * a)
6673 uint64x1_t result;
6674 __asm__ ("ld1r {%0.1d}, %1"
6675 : "=w"(result)
6676 : "Utv"(*a)
6677 : /* No clobbers */);
6678 return result;
6681 #define vld1_lane_f32(a, b, c) \
6682 __extension__ \
6683 ({ \
6684 float32x2_t b_ = (b); \
6685 const float32_t * a_ = (a); \
6686 float32x2_t result; \
6687 __asm__ ("ld1 {%0.s}[%1], %2" \
6688 : "=w"(result) \
6689 : "i" (c), "Utv"(*a_), "0"(b_) \
6690 : /* No clobbers */); \
6691 result; \
6694 #define vld1_lane_f64(a, b, c) \
6695 __extension__ \
6696 ({ \
6697 float64x1_t b_ = (b); \
6698 const float64_t * a_ = (a); \
6699 float64x1_t result; \
6700 __asm__ ("ld1 {%0.d}[%1], %2" \
6701 : "=w"(result) \
6702 : "i" (c), "Utv"(*a_), "0"(b_) \
6703 : /* No clobbers */); \
6704 result; \
6707 #define vld1_lane_p8(a, b, c) \
6708 __extension__ \
6709 ({ \
6710 poly8x8_t b_ = (b); \
6711 const poly8_t * a_ = (a); \
6712 poly8x8_t result; \
6713 __asm__ ("ld1 {%0.b}[%1], %2" \
6714 : "=w"(result) \
6715 : "i" (c), "Utv"(*a_), "0"(b_) \
6716 : /* No clobbers */); \
6717 result; \
6720 #define vld1_lane_p16(a, b, c) \
6721 __extension__ \
6722 ({ \
6723 poly16x4_t b_ = (b); \
6724 const poly16_t * a_ = (a); \
6725 poly16x4_t result; \
6726 __asm__ ("ld1 {%0.h}[%1], %2" \
6727 : "=w"(result) \
6728 : "i" (c), "Utv"(*a_), "0"(b_) \
6729 : /* No clobbers */); \
6730 result; \
6733 #define vld1_lane_s8(a, b, c) \
6734 __extension__ \
6735 ({ \
6736 int8x8_t b_ = (b); \
6737 const int8_t * a_ = (a); \
6738 int8x8_t result; \
6739 __asm__ ("ld1 {%0.b}[%1], %2" \
6740 : "=w"(result) \
6741 : "i" (c), "Utv"(*a_), "0"(b_) \
6742 : /* No clobbers */); \
6743 result; \
6746 #define vld1_lane_s16(a, b, c) \
6747 __extension__ \
6748 ({ \
6749 int16x4_t b_ = (b); \
6750 const int16_t * a_ = (a); \
6751 int16x4_t result; \
6752 __asm__ ("ld1 {%0.h}[%1], %2" \
6753 : "=w"(result) \
6754 : "i" (c), "Utv"(*a_), "0"(b_) \
6755 : /* No clobbers */); \
6756 result; \
6759 #define vld1_lane_s32(a, b, c) \
6760 __extension__ \
6761 ({ \
6762 int32x2_t b_ = (b); \
6763 const int32_t * a_ = (a); \
6764 int32x2_t result; \
6765 __asm__ ("ld1 {%0.s}[%1], %2" \
6766 : "=w"(result) \
6767 : "i" (c), "Utv"(*a_), "0"(b_) \
6768 : /* No clobbers */); \
6769 result; \
6772 #define vld1_lane_s64(a, b, c) \
6773 __extension__ \
6774 ({ \
6775 int64x1_t b_ = (b); \
6776 const int64_t * a_ = (a); \
6777 int64x1_t result; \
6778 __asm__ ("ld1 {%0.d}[%1], %2" \
6779 : "=w"(result) \
6780 : "i" (c), "Utv"(*a_), "0"(b_) \
6781 : /* No clobbers */); \
6782 result; \
6785 #define vld1_lane_u8(a, b, c) \
6786 __extension__ \
6787 ({ \
6788 uint8x8_t b_ = (b); \
6789 const uint8_t * a_ = (a); \
6790 uint8x8_t result; \
6791 __asm__ ("ld1 {%0.b}[%1], %2" \
6792 : "=w"(result) \
6793 : "i" (c), "Utv"(*a_), "0"(b_) \
6794 : /* No clobbers */); \
6795 result; \
6798 #define vld1_lane_u16(a, b, c) \
6799 __extension__ \
6800 ({ \
6801 uint16x4_t b_ = (b); \
6802 const uint16_t * a_ = (a); \
6803 uint16x4_t result; \
6804 __asm__ ("ld1 {%0.h}[%1], %2" \
6805 : "=w"(result) \
6806 : "i" (c), "Utv"(*a_), "0"(b_) \
6807 : /* No clobbers */); \
6808 result; \
6811 #define vld1_lane_u32(a, b, c) \
6812 __extension__ \
6813 ({ \
6814 uint32x2_t b_ = (b); \
6815 const uint32_t * a_ = (a); \
6816 uint32x2_t result; \
6817 __asm__ ("ld1 {%0.s}[%1], %2" \
6818 : "=w"(result) \
6819 : "i" (c), "Utv"(*a_), "0"(b_) \
6820 : /* No clobbers */); \
6821 result; \
6824 #define vld1_lane_u64(a, b, c) \
6825 __extension__ \
6826 ({ \
6827 uint64x1_t b_ = (b); \
6828 const uint64_t * a_ = (a); \
6829 uint64x1_t result; \
6830 __asm__ ("ld1 {%0.d}[%1], %2" \
6831 : "=w"(result) \
6832 : "i" (c), "Utv"(*a_), "0"(b_) \
6833 : /* No clobbers */); \
6834 result; \
6837 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6838 vld1q_dup_f32 (const float32_t * a)
6840 float32x4_t result;
6841 __asm__ ("ld1r {%0.4s}, %1"
6842 : "=w"(result)
6843 : "Utv"(*a)
6844 : /* No clobbers */);
6845 return result;
6848 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6849 vld1q_dup_f64 (const float64_t * a)
6851 float64x2_t result;
6852 __asm__ ("ld1r {%0.2d}, %1"
6853 : "=w"(result)
6854 : "Utv"(*a)
6855 : /* No clobbers */);
6856 return result;
6859 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6860 vld1q_dup_p8 (const poly8_t * a)
6862 poly8x16_t result;
6863 __asm__ ("ld1r {%0.16b}, %1"
6864 : "=w"(result)
6865 : "Utv"(*a)
6866 : /* No clobbers */);
6867 return result;
6870 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6871 vld1q_dup_p16 (const poly16_t * a)
6873 poly16x8_t result;
6874 __asm__ ("ld1r {%0.8h}, %1"
6875 : "=w"(result)
6876 : "Utv"(*a)
6877 : /* No clobbers */);
6878 return result;
6881 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6882 vld1q_dup_s8 (const int8_t * a)
6884 int8x16_t result;
6885 __asm__ ("ld1r {%0.16b}, %1"
6886 : "=w"(result)
6887 : "Utv"(*a)
6888 : /* No clobbers */);
6889 return result;
6892 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6893 vld1q_dup_s16 (const int16_t * a)
6895 int16x8_t result;
6896 __asm__ ("ld1r {%0.8h}, %1"
6897 : "=w"(result)
6898 : "Utv"(*a)
6899 : /* No clobbers */);
6900 return result;
6903 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6904 vld1q_dup_s32 (const int32_t * a)
6906 int32x4_t result;
6907 __asm__ ("ld1r {%0.4s}, %1"
6908 : "=w"(result)
6909 : "Utv"(*a)
6910 : /* No clobbers */);
6911 return result;
6914 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6915 vld1q_dup_s64 (const int64_t * a)
6917 int64x2_t result;
6918 __asm__ ("ld1r {%0.2d}, %1"
6919 : "=w"(result)
6920 : "Utv"(*a)
6921 : /* No clobbers */);
6922 return result;
6925 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6926 vld1q_dup_u8 (const uint8_t * a)
6928 uint8x16_t result;
6929 __asm__ ("ld1r {%0.16b}, %1"
6930 : "=w"(result)
6931 : "Utv"(*a)
6932 : /* No clobbers */);
6933 return result;
6936 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6937 vld1q_dup_u16 (const uint16_t * a)
6939 uint16x8_t result;
6940 __asm__ ("ld1r {%0.8h}, %1"
6941 : "=w"(result)
6942 : "Utv"(*a)
6943 : /* No clobbers */);
6944 return result;
6947 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6948 vld1q_dup_u32 (const uint32_t * a)
6950 uint32x4_t result;
6951 __asm__ ("ld1r {%0.4s}, %1"
6952 : "=w"(result)
6953 : "Utv"(*a)
6954 : /* No clobbers */);
6955 return result;
6958 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6959 vld1q_dup_u64 (const uint64_t * a)
6961 uint64x2_t result;
6962 __asm__ ("ld1r {%0.2d}, %1"
6963 : "=w"(result)
6964 : "Utv"(*a)
6965 : /* No clobbers */);
6966 return result;
6969 #define vld1q_lane_f32(a, b, c) \
6970 __extension__ \
6971 ({ \
6972 float32x4_t b_ = (b); \
6973 const float32_t * a_ = (a); \
6974 float32x4_t result; \
6975 __asm__ ("ld1 {%0.s}[%1], %2" \
6976 : "=w"(result) \
6977 : "i"(c), "Utv"(*a_), "0"(b_) \
6978 : /* No clobbers */); \
6979 result; \
6982 #define vld1q_lane_f64(a, b, c) \
6983 __extension__ \
6984 ({ \
6985 float64x2_t b_ = (b); \
6986 const float64_t * a_ = (a); \
6987 float64x2_t result; \
6988 __asm__ ("ld1 {%0.d}[%1], %2" \
6989 : "=w"(result) \
6990 : "i"(c), "Utv"(*a_), "0"(b_) \
6991 : /* No clobbers */); \
6992 result; \
6995 #define vld1q_lane_p8(a, b, c) \
6996 __extension__ \
6997 ({ \
6998 poly8x16_t b_ = (b); \
6999 const poly8_t * a_ = (a); \
7000 poly8x16_t result; \
7001 __asm__ ("ld1 {%0.b}[%1], %2" \
7002 : "=w"(result) \
7003 : "i"(c), "Utv"(*a_), "0"(b_) \
7004 : /* No clobbers */); \
7005 result; \
7008 #define vld1q_lane_p16(a, b, c) \
7009 __extension__ \
7010 ({ \
7011 poly16x8_t b_ = (b); \
7012 const poly16_t * a_ = (a); \
7013 poly16x8_t result; \
7014 __asm__ ("ld1 {%0.h}[%1], %2" \
7015 : "=w"(result) \
7016 : "i"(c), "Utv"(*a_), "0"(b_) \
7017 : /* No clobbers */); \
7018 result; \
7021 #define vld1q_lane_s8(a, b, c) \
7022 __extension__ \
7023 ({ \
7024 int8x16_t b_ = (b); \
7025 const int8_t * a_ = (a); \
7026 int8x16_t result; \
7027 __asm__ ("ld1 {%0.b}[%1], %2" \
7028 : "=w"(result) \
7029 : "i"(c), "Utv"(*a_), "0"(b_) \
7030 : /* No clobbers */); \
7031 result; \
7034 #define vld1q_lane_s16(a, b, c) \
7035 __extension__ \
7036 ({ \
7037 int16x8_t b_ = (b); \
7038 const int16_t * a_ = (a); \
7039 int16x8_t result; \
7040 __asm__ ("ld1 {%0.h}[%1], %2" \
7041 : "=w"(result) \
7042 : "i"(c), "Utv"(*a_), "0"(b_) \
7043 : /* No clobbers */); \
7044 result; \
7047 #define vld1q_lane_s32(a, b, c) \
7048 __extension__ \
7049 ({ \
7050 int32x4_t b_ = (b); \
7051 const int32_t * a_ = (a); \
7052 int32x4_t result; \
7053 __asm__ ("ld1 {%0.s}[%1], %2" \
7054 : "=w"(result) \
7055 : "i"(c), "Utv"(*a_), "0"(b_) \
7056 : /* No clobbers */); \
7057 result; \
7060 #define vld1q_lane_s64(a, b, c) \
7061 __extension__ \
7062 ({ \
7063 int64x2_t b_ = (b); \
7064 const int64_t * a_ = (a); \
7065 int64x2_t result; \
7066 __asm__ ("ld1 {%0.d}[%1], %2" \
7067 : "=w"(result) \
7068 : "i"(c), "Utv"(*a_), "0"(b_) \
7069 : /* No clobbers */); \
7070 result; \
7073 #define vld1q_lane_u8(a, b, c) \
7074 __extension__ \
7075 ({ \
7076 uint8x16_t b_ = (b); \
7077 const uint8_t * a_ = (a); \
7078 uint8x16_t result; \
7079 __asm__ ("ld1 {%0.b}[%1], %2" \
7080 : "=w"(result) \
7081 : "i"(c), "Utv"(*a_), "0"(b_) \
7082 : /* No clobbers */); \
7083 result; \
7086 #define vld1q_lane_u16(a, b, c) \
7087 __extension__ \
7088 ({ \
7089 uint16x8_t b_ = (b); \
7090 const uint16_t * a_ = (a); \
7091 uint16x8_t result; \
7092 __asm__ ("ld1 {%0.h}[%1], %2" \
7093 : "=w"(result) \
7094 : "i"(c), "Utv"(*a_), "0"(b_) \
7095 : /* No clobbers */); \
7096 result; \
7099 #define vld1q_lane_u32(a, b, c) \
7100 __extension__ \
7101 ({ \
7102 uint32x4_t b_ = (b); \
7103 const uint32_t * a_ = (a); \
7104 uint32x4_t result; \
7105 __asm__ ("ld1 {%0.s}[%1], %2" \
7106 : "=w"(result) \
7107 : "i"(c), "Utv"(*a_), "0"(b_) \
7108 : /* No clobbers */); \
7109 result; \
7112 #define vld1q_lane_u64(a, b, c) \
7113 __extension__ \
7114 ({ \
7115 uint64x2_t b_ = (b); \
7116 const uint64_t * a_ = (a); \
7117 uint64x2_t result; \
7118 __asm__ ("ld1 {%0.d}[%1], %2" \
7119 : "=w"(result) \
7120 : "i"(c), "Utv"(*a_), "0"(b_) \
7121 : /* No clobbers */); \
7122 result; \
7125 #define vmla_lane_f32(a, b, c, d) \
7126 __extension__ \
7127 ({ \
7128 float32x2_t c_ = (c); \
7129 float32x2_t b_ = (b); \
7130 float32x2_t a_ = (a); \
7131 float32x2_t result; \
7132 float32x2_t t1; \
7133 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
7134 : "=w"(result), "=w"(t1) \
7135 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7136 : /* No clobbers */); \
7137 result; \
7140 #define vmla_lane_s16(a, b, c, d) \
7141 __extension__ \
7142 ({ \
7143 int16x4_t c_ = (c); \
7144 int16x4_t b_ = (b); \
7145 int16x4_t a_ = (a); \
7146 int16x4_t result; \
7147 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7148 : "=w"(result) \
7149 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7150 : /* No clobbers */); \
7151 result; \
7154 #define vmla_lane_s32(a, b, c, d) \
7155 __extension__ \
7156 ({ \
7157 int32x2_t c_ = (c); \
7158 int32x2_t b_ = (b); \
7159 int32x2_t a_ = (a); \
7160 int32x2_t result; \
7161 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7162 : "=w"(result) \
7163 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7164 : /* No clobbers */); \
7165 result; \
7168 #define vmla_lane_u16(a, b, c, d) \
7169 __extension__ \
7170 ({ \
7171 uint16x4_t c_ = (c); \
7172 uint16x4_t b_ = (b); \
7173 uint16x4_t a_ = (a); \
7174 uint16x4_t result; \
7175 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7176 : "=w"(result) \
7177 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7178 : /* No clobbers */); \
7179 result; \
7182 #define vmla_lane_u32(a, b, c, d) \
7183 __extension__ \
7184 ({ \
7185 uint32x2_t c_ = (c); \
7186 uint32x2_t b_ = (b); \
7187 uint32x2_t a_ = (a); \
7188 uint32x2_t result; \
7189 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7190 : "=w"(result) \
7191 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7192 : /* No clobbers */); \
7193 result; \
7196 #define vmla_laneq_s16(a, b, c, d) \
7197 __extension__ \
7198 ({ \
7199 int16x8_t c_ = (c); \
7200 int16x4_t b_ = (b); \
7201 int16x4_t a_ = (a); \
7202 int16x4_t result; \
7203 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7204 : "=w"(result) \
7205 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7206 : /* No clobbers */); \
7207 result; \
7210 #define vmla_laneq_s32(a, b, c, d) \
7211 __extension__ \
7212 ({ \
7213 int32x4_t c_ = (c); \
7214 int32x2_t b_ = (b); \
7215 int32x2_t a_ = (a); \
7216 int32x2_t result; \
7217 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7218 : "=w"(result) \
7219 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7220 : /* No clobbers */); \
7221 result; \
7224 #define vmla_laneq_u16(a, b, c, d) \
7225 __extension__ \
7226 ({ \
7227 uint16x8_t c_ = (c); \
7228 uint16x4_t b_ = (b); \
7229 uint16x4_t a_ = (a); \
7230 uint16x4_t result; \
7231 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7232 : "=w"(result) \
7233 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7234 : /* No clobbers */); \
7235 result; \
7238 #define vmla_laneq_u32(a, b, c, d) \
7239 __extension__ \
7240 ({ \
7241 uint32x4_t c_ = (c); \
7242 uint32x2_t b_ = (b); \
7243 uint32x2_t a_ = (a); \
7244 uint32x2_t result; \
7245 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7246 : "=w"(result) \
7247 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7248 : /* No clobbers */); \
7249 result; \
7252 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7253 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7255 float32x2_t result;
7256 float32x2_t t1;
7257 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
7258 : "=w"(result), "=w"(t1)
7259 : "0"(a), "w"(b), "w"(c)
7260 : /* No clobbers */);
7261 return result;
7264 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7265 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7267 int16x4_t result;
7268 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7269 : "=w"(result)
7270 : "0"(a), "w"(b), "x"(c)
7271 : /* No clobbers */);
7272 return result;
7275 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7276 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7278 int32x2_t result;
7279 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7280 : "=w"(result)
7281 : "0"(a), "w"(b), "w"(c)
7282 : /* No clobbers */);
7283 return result;
7286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7287 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7289 uint16x4_t result;
7290 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7291 : "=w"(result)
7292 : "0"(a), "w"(b), "x"(c)
7293 : /* No clobbers */);
7294 return result;
7297 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7298 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7300 uint32x2_t result;
7301 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7302 : "=w"(result)
7303 : "0"(a), "w"(b), "w"(c)
7304 : /* No clobbers */);
7305 return result;
7308 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7309 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7311 int8x8_t result;
7312 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7313 : "=w"(result)
7314 : "0"(a), "w"(b), "w"(c)
7315 : /* No clobbers */);
7316 return result;
7319 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7320 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7322 int16x4_t result;
7323 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7324 : "=w"(result)
7325 : "0"(a), "w"(b), "w"(c)
7326 : /* No clobbers */);
7327 return result;
7330 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7331 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7333 int32x2_t result;
7334 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7335 : "=w"(result)
7336 : "0"(a), "w"(b), "w"(c)
7337 : /* No clobbers */);
7338 return result;
7341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7342 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7344 uint8x8_t result;
7345 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7346 : "=w"(result)
7347 : "0"(a), "w"(b), "w"(c)
7348 : /* No clobbers */);
7349 return result;
7352 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7353 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7355 uint16x4_t result;
7356 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7357 : "=w"(result)
7358 : "0"(a), "w"(b), "w"(c)
7359 : /* No clobbers */);
7360 return result;
7363 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7364 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7366 uint32x2_t result;
7367 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7368 : "=w"(result)
7369 : "0"(a), "w"(b), "w"(c)
7370 : /* No clobbers */);
7371 return result;
7374 #define vmlal_high_lane_s16(a, b, c, d) \
7375 __extension__ \
7376 ({ \
7377 int16x8_t c_ = (c); \
7378 int16x8_t b_ = (b); \
7379 int32x4_t a_ = (a); \
7380 int32x4_t result; \
7381 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7382 : "=w"(result) \
7383 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7384 : /* No clobbers */); \
7385 result; \
7388 #define vmlal_high_lane_s32(a, b, c, d) \
7389 __extension__ \
7390 ({ \
7391 int32x4_t c_ = (c); \
7392 int32x4_t b_ = (b); \
7393 int64x2_t a_ = (a); \
7394 int64x2_t result; \
7395 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7396 : "=w"(result) \
7397 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7398 : /* No clobbers */); \
7399 result; \
7402 #define vmlal_high_lane_u16(a, b, c, d) \
7403 __extension__ \
7404 ({ \
7405 uint16x8_t c_ = (c); \
7406 uint16x8_t b_ = (b); \
7407 uint32x4_t a_ = (a); \
7408 uint32x4_t result; \
7409 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7410 : "=w"(result) \
7411 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7412 : /* No clobbers */); \
7413 result; \
7416 #define vmlal_high_lane_u32(a, b, c, d) \
7417 __extension__ \
7418 ({ \
7419 uint32x4_t c_ = (c); \
7420 uint32x4_t b_ = (b); \
7421 uint64x2_t a_ = (a); \
7422 uint64x2_t result; \
7423 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7424 : "=w"(result) \
7425 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7426 : /* No clobbers */); \
7427 result; \
7430 #define vmlal_high_laneq_s16(a, b, c, d) \
7431 __extension__ \
7432 ({ \
7433 int16x8_t c_ = (c); \
7434 int16x8_t b_ = (b); \
7435 int32x4_t a_ = (a); \
7436 int32x4_t result; \
7437 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7438 : "=w"(result) \
7439 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7440 : /* No clobbers */); \
7441 result; \
7444 #define vmlal_high_laneq_s32(a, b, c, d) \
7445 __extension__ \
7446 ({ \
7447 int32x4_t c_ = (c); \
7448 int32x4_t b_ = (b); \
7449 int64x2_t a_ = (a); \
7450 int64x2_t result; \
7451 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7452 : "=w"(result) \
7453 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7454 : /* No clobbers */); \
7455 result; \
7458 #define vmlal_high_laneq_u16(a, b, c, d) \
7459 __extension__ \
7460 ({ \
7461 uint16x8_t c_ = (c); \
7462 uint16x8_t b_ = (b); \
7463 uint32x4_t a_ = (a); \
7464 uint32x4_t result; \
7465 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7466 : "=w"(result) \
7467 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7468 : /* No clobbers */); \
7469 result; \
7472 #define vmlal_high_laneq_u32(a, b, c, d) \
7473 __extension__ \
7474 ({ \
7475 uint32x4_t c_ = (c); \
7476 uint32x4_t b_ = (b); \
7477 uint64x2_t a_ = (a); \
7478 uint64x2_t result; \
7479 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7480 : "=w"(result) \
7481 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7482 : /* No clobbers */); \
7483 result; \
7486 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7487 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7489 int32x4_t result;
7490 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7491 : "=w"(result)
7492 : "0"(a), "w"(b), "x"(c)
7493 : /* No clobbers */);
7494 return result;
7497 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7498 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7500 int64x2_t result;
7501 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
7502 : "=w"(result)
7503 : "0"(a), "w"(b), "w"(c)
7504 : /* No clobbers */);
7505 return result;
7508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7509 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7511 uint32x4_t result;
7512 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
7513 : "=w"(result)
7514 : "0"(a), "w"(b), "x"(c)
7515 : /* No clobbers */);
7516 return result;
7519 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7520 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7522 uint64x2_t result;
7523 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
7524 : "=w"(result)
7525 : "0"(a), "w"(b), "w"(c)
7526 : /* No clobbers */);
7527 return result;
7530 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7531 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7533 int16x8_t result;
7534 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
7535 : "=w"(result)
7536 : "0"(a), "w"(b), "w"(c)
7537 : /* No clobbers */);
7538 return result;
7541 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7542 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7544 int32x4_t result;
7545 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
7546 : "=w"(result)
7547 : "0"(a), "w"(b), "w"(c)
7548 : /* No clobbers */);
7549 return result;
7552 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7553 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7555 int64x2_t result;
7556 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
7557 : "=w"(result)
7558 : "0"(a), "w"(b), "w"(c)
7559 : /* No clobbers */);
7560 return result;
7563 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7564 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7566 uint16x8_t result;
7567 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
7568 : "=w"(result)
7569 : "0"(a), "w"(b), "w"(c)
7570 : /* No clobbers */);
7571 return result;
7574 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7575 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7577 uint32x4_t result;
7578 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
7579 : "=w"(result)
7580 : "0"(a), "w"(b), "w"(c)
7581 : /* No clobbers */);
7582 return result;
7585 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7586 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7588 uint64x2_t result;
7589 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7590 : "=w"(result)
7591 : "0"(a), "w"(b), "w"(c)
7592 : /* No clobbers */);
7593 return result;
7596 #define vmlal_lane_s16(a, b, c, d) \
7597 __extension__ \
7598 ({ \
7599 int16x4_t c_ = (c); \
7600 int16x4_t b_ = (b); \
7601 int32x4_t a_ = (a); \
7602 int32x4_t result; \
7603 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7604 : "=w"(result) \
7605 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7606 : /* No clobbers */); \
7607 result; \
7610 #define vmlal_lane_s32(a, b, c, d) \
7611 __extension__ \
7612 ({ \
7613 int32x2_t c_ = (c); \
7614 int32x2_t b_ = (b); \
7615 int64x2_t a_ = (a); \
7616 int64x2_t result; \
7617 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7618 : "=w"(result) \
7619 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7620 : /* No clobbers */); \
7621 result; \
7624 #define vmlal_lane_u16(a, b, c, d) \
7625 __extension__ \
7626 ({ \
7627 uint16x4_t c_ = (c); \
7628 uint16x4_t b_ = (b); \
7629 uint32x4_t a_ = (a); \
7630 uint32x4_t result; \
7631 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7632 : "=w"(result) \
7633 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7634 : /* No clobbers */); \
7635 result; \
7638 #define vmlal_lane_u32(a, b, c, d) \
7639 __extension__ \
7640 ({ \
7641 uint32x2_t c_ = (c); \
7642 uint32x2_t b_ = (b); \
7643 uint64x2_t a_ = (a); \
7644 uint64x2_t result; \
7645 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7646 : "=w"(result) \
7647 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7648 : /* No clobbers */); \
7649 result; \
7652 #define vmlal_laneq_s16(a, b, c, d) \
7653 __extension__ \
7654 ({ \
7655 int16x8_t c_ = (c); \
7656 int16x4_t b_ = (b); \
7657 int32x4_t a_ = (a); \
7658 int32x4_t result; \
7659 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7660 : "=w"(result) \
7661 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7662 : /* No clobbers */); \
7663 result; \
7666 #define vmlal_laneq_s32(a, b, c, d) \
7667 __extension__ \
7668 ({ \
7669 int32x4_t c_ = (c); \
7670 int32x2_t b_ = (b); \
7671 int64x2_t a_ = (a); \
7672 int64x2_t result; \
7673 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7674 : "=w"(result) \
7675 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7676 : /* No clobbers */); \
7677 result; \
7680 #define vmlal_laneq_u16(a, b, c, d) \
7681 __extension__ \
7682 ({ \
7683 uint16x8_t c_ = (c); \
7684 uint16x4_t b_ = (b); \
7685 uint32x4_t a_ = (a); \
7686 uint32x4_t result; \
7687 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7688 : "=w"(result) \
7689 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7690 : /* No clobbers */); \
7691 result; \
7694 #define vmlal_laneq_u32(a, b, c, d) \
7695 __extension__ \
7696 ({ \
7697 uint32x4_t c_ = (c); \
7698 uint32x2_t b_ = (b); \
7699 uint64x2_t a_ = (a); \
7700 uint64x2_t result; \
7701 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7702 : "=w"(result) \
7703 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7704 : /* No clobbers */); \
7705 result; \
7708 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7709 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7711 int32x4_t result;
7712 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7713 : "=w"(result)
7714 : "0"(a), "w"(b), "x"(c)
7715 : /* No clobbers */);
7716 return result;
7719 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7720 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7722 int64x2_t result;
7723 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7724 : "=w"(result)
7725 : "0"(a), "w"(b), "w"(c)
7726 : /* No clobbers */);
7727 return result;
7730 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7731 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7733 uint32x4_t result;
7734 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7735 : "=w"(result)
7736 : "0"(a), "w"(b), "x"(c)
7737 : /* No clobbers */);
7738 return result;
7741 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7742 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7744 uint64x2_t result;
7745 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7746 : "=w"(result)
7747 : "0"(a), "w"(b), "w"(c)
7748 : /* No clobbers */);
7749 return result;
7752 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7753 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7755 int16x8_t result;
7756 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7757 : "=w"(result)
7758 : "0"(a), "w"(b), "w"(c)
7759 : /* No clobbers */);
7760 return result;
7763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7764 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7766 int32x4_t result;
7767 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7768 : "=w"(result)
7769 : "0"(a), "w"(b), "w"(c)
7770 : /* No clobbers */);
7771 return result;
7774 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7775 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7777 int64x2_t result;
7778 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7779 : "=w"(result)
7780 : "0"(a), "w"(b), "w"(c)
7781 : /* No clobbers */);
7782 return result;
7785 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7786 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7788 uint16x8_t result;
7789 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7790 : "=w"(result)
7791 : "0"(a), "w"(b), "w"(c)
7792 : /* No clobbers */);
7793 return result;
7796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7797 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7799 uint32x4_t result;
7800 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7801 : "=w"(result)
7802 : "0"(a), "w"(b), "w"(c)
7803 : /* No clobbers */);
7804 return result;
7807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7808 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7810 uint64x2_t result;
7811 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7812 : "=w"(result)
7813 : "0"(a), "w"(b), "w"(c)
7814 : /* No clobbers */);
7815 return result;
7818 #define vmlaq_lane_f32(a, b, c, d) \
7819 __extension__ \
7820 ({ \
7821 float32x4_t c_ = (c); \
7822 float32x4_t b_ = (b); \
7823 float32x4_t a_ = (a); \
7824 float32x4_t result; \
7825 float32x4_t t1; \
7826 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
7827 : "=w"(result), "=w"(t1) \
7828 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7829 : /* No clobbers */); \
7830 result; \
7833 #define vmlaq_lane_s16(a, b, c, d) \
7834 __extension__ \
7835 ({ \
7836 int16x8_t c_ = (c); \
7837 int16x8_t b_ = (b); \
7838 int16x8_t a_ = (a); \
7839 int16x8_t result; \
7840 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
7841 : "=w"(result) \
7842 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7843 : /* No clobbers */); \
7844 result; \
7847 #define vmlaq_lane_s32(a, b, c, d) \
7848 __extension__ \
7849 ({ \
7850 int32x4_t c_ = (c); \
7851 int32x4_t b_ = (b); \
7852 int32x4_t a_ = (a); \
7853 int32x4_t result; \
7854 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
7855 : "=w"(result) \
7856 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7857 : /* No clobbers */); \
7858 result; \
7861 #define vmlaq_lane_u16(a, b, c, d) \
7862 __extension__ \
7863 ({ \
7864 uint16x8_t c_ = (c); \
7865 uint16x8_t b_ = (b); \
7866 uint16x8_t a_ = (a); \
7867 uint16x8_t result; \
7868 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
7869 : "=w"(result) \
7870 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7871 : /* No clobbers */); \
7872 result; \
7875 #define vmlaq_lane_u32(a, b, c, d) \
7876 __extension__ \
7877 ({ \
7878 uint32x4_t c_ = (c); \
7879 uint32x4_t b_ = (b); \
7880 uint32x4_t a_ = (a); \
7881 uint32x4_t result; \
7882 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
7883 : "=w"(result) \
7884 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7885 : /* No clobbers */); \
7886 result; \
7889 #define vmlaq_laneq_s16(a, b, c, d) \
7890 __extension__ \
7891 ({ \
7892 int16x8_t c_ = (c); \
7893 int16x8_t b_ = (b); \
7894 int16x8_t a_ = (a); \
7895 int16x8_t result; \
7896 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
7897 : "=w"(result) \
7898 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7899 : /* No clobbers */); \
7900 result; \
7903 #define vmlaq_laneq_s32(a, b, c, d) \
7904 __extension__ \
7905 ({ \
7906 int32x4_t c_ = (c); \
7907 int32x4_t b_ = (b); \
7908 int32x4_t a_ = (a); \
7909 int32x4_t result; \
7910 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
7911 : "=w"(result) \
7912 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7913 : /* No clobbers */); \
7914 result; \
7917 #define vmlaq_laneq_u16(a, b, c, d) \
7918 __extension__ \
7919 ({ \
7920 uint16x8_t c_ = (c); \
7921 uint16x8_t b_ = (b); \
7922 uint16x8_t a_ = (a); \
7923 uint16x8_t result; \
7924 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
7925 : "=w"(result) \
7926 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7927 : /* No clobbers */); \
7928 result; \
7931 #define vmlaq_laneq_u32(a, b, c, d) \
7932 __extension__ \
7933 ({ \
7934 uint32x4_t c_ = (c); \
7935 uint32x4_t b_ = (b); \
7936 uint32x4_t a_ = (a); \
7937 uint32x4_t result; \
7938 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
7939 : "=w"(result) \
7940 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7941 : /* No clobbers */); \
7942 result; \
7945 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7946 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7948 float32x4_t result;
7949 float32x4_t t1;
7950 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7951 : "=w"(result), "=w"(t1)
7952 : "0"(a), "w"(b), "w"(c)
7953 : /* No clobbers */);
7954 return result;
7957 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7958 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7960 float64x2_t result;
7961 float64x2_t t1;
7962 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7963 : "=w"(result), "=w"(t1)
7964 : "0"(a), "w"(b), "w"(c)
7965 : /* No clobbers */);
7966 return result;
7969 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7970 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7972 int16x8_t result;
7973 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7974 : "=w"(result)
7975 : "0"(a), "w"(b), "x"(c)
7976 : /* No clobbers */);
7977 return result;
7980 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7981 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7983 int32x4_t result;
7984 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7985 : "=w"(result)
7986 : "0"(a), "w"(b), "w"(c)
7987 : /* No clobbers */);
7988 return result;
7991 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7992 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7994 uint16x8_t result;
7995 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7996 : "=w"(result)
7997 : "0"(a), "w"(b), "x"(c)
7998 : /* No clobbers */);
7999 return result;
8002 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8003 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8005 uint32x4_t result;
8006 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8007 : "=w"(result)
8008 : "0"(a), "w"(b), "w"(c)
8009 : /* No clobbers */);
8010 return result;
8013 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8014 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8016 int8x16_t result;
8017 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8018 : "=w"(result)
8019 : "0"(a), "w"(b), "w"(c)
8020 : /* No clobbers */);
8021 return result;
8024 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8025 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8027 int16x8_t result;
8028 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8029 : "=w"(result)
8030 : "0"(a), "w"(b), "w"(c)
8031 : /* No clobbers */);
8032 return result;
8035 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8036 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8038 int32x4_t result;
8039 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8040 : "=w"(result)
8041 : "0"(a), "w"(b), "w"(c)
8042 : /* No clobbers */);
8043 return result;
8046 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8047 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8049 uint8x16_t result;
8050 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8051 : "=w"(result)
8052 : "0"(a), "w"(b), "w"(c)
8053 : /* No clobbers */);
8054 return result;
8057 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8058 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8060 uint16x8_t result;
8061 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8062 : "=w"(result)
8063 : "0"(a), "w"(b), "w"(c)
8064 : /* No clobbers */);
8065 return result;
8068 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8069 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8071 uint32x4_t result;
8072 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8073 : "=w"(result)
8074 : "0"(a), "w"(b), "w"(c)
8075 : /* No clobbers */);
8076 return result;
8079 #define vmls_lane_f32(a, b, c, d) \
8080 __extension__ \
8081 ({ \
8082 float32x2_t c_ = (c); \
8083 float32x2_t b_ = (b); \
8084 float32x2_t a_ = (a); \
8085 float32x2_t result; \
8086 float32x2_t t1; \
8087 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
8088 : "=w"(result), "=w"(t1) \
8089 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8090 : /* No clobbers */); \
8091 result; \
8094 #define vmls_lane_s16(a, b, c, d) \
8095 __extension__ \
8096 ({ \
8097 int16x4_t c_ = (c); \
8098 int16x4_t b_ = (b); \
8099 int16x4_t a_ = (a); \
8100 int16x4_t result; \
8101 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8102 : "=w"(result) \
8103 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8104 : /* No clobbers */); \
8105 result; \
8108 #define vmls_lane_s32(a, b, c, d) \
8109 __extension__ \
8110 ({ \
8111 int32x2_t c_ = (c); \
8112 int32x2_t b_ = (b); \
8113 int32x2_t a_ = (a); \
8114 int32x2_t result; \
8115 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8116 : "=w"(result) \
8117 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8118 : /* No clobbers */); \
8119 result; \
8122 #define vmls_lane_u16(a, b, c, d) \
8123 __extension__ \
8124 ({ \
8125 uint16x4_t c_ = (c); \
8126 uint16x4_t b_ = (b); \
8127 uint16x4_t a_ = (a); \
8128 uint16x4_t result; \
8129 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8130 : "=w"(result) \
8131 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8132 : /* No clobbers */); \
8133 result; \
8136 #define vmls_lane_u32(a, b, c, d) \
8137 __extension__ \
8138 ({ \
8139 uint32x2_t c_ = (c); \
8140 uint32x2_t b_ = (b); \
8141 uint32x2_t a_ = (a); \
8142 uint32x2_t result; \
8143 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8144 : "=w"(result) \
8145 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8146 : /* No clobbers */); \
8147 result; \
8150 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8151 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
8153 float32x2_t result;
8154 float32x2_t t1;
8155 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
8156 : "=w"(result), "=w"(t1)
8157 : "0"(a), "w"(b), "w"(c)
8158 : /* No clobbers */);
8159 return result;
8162 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8163 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
8165 int16x4_t result;
8166 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8167 : "=w"(result)
8168 : "0"(a), "w"(b), "x"(c)
8169 : /* No clobbers */);
8170 return result;
8173 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8174 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
8176 int32x2_t result;
8177 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8178 : "=w"(result)
8179 : "0"(a), "w"(b), "w"(c)
8180 : /* No clobbers */);
8181 return result;
8184 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8185 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
8187 uint16x4_t result;
8188 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8189 : "=w"(result)
8190 : "0"(a), "w"(b), "x"(c)
8191 : /* No clobbers */);
8192 return result;
8195 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8196 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
8198 uint32x2_t result;
8199 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8200 : "=w"(result)
8201 : "0"(a), "w"(b), "w"(c)
8202 : /* No clobbers */);
8203 return result;
8206 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8207 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
8209 int8x8_t result;
8210 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8211 : "=w"(result)
8212 : "0"(a), "w"(b), "w"(c)
8213 : /* No clobbers */);
8214 return result;
8217 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8218 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
8220 int16x4_t result;
8221 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8222 : "=w"(result)
8223 : "0"(a), "w"(b), "w"(c)
8224 : /* No clobbers */);
8225 return result;
8228 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8229 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
8231 int32x2_t result;
8232 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8233 : "=w"(result)
8234 : "0"(a), "w"(b), "w"(c)
8235 : /* No clobbers */);
8236 return result;
8239 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8240 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
8242 uint8x8_t result;
8243 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8244 : "=w"(result)
8245 : "0"(a), "w"(b), "w"(c)
8246 : /* No clobbers */);
8247 return result;
8250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8251 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
8253 uint16x4_t result;
8254 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8255 : "=w"(result)
8256 : "0"(a), "w"(b), "w"(c)
8257 : /* No clobbers */);
8258 return result;
8261 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8262 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
8264 uint32x2_t result;
8265 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8266 : "=w"(result)
8267 : "0"(a), "w"(b), "w"(c)
8268 : /* No clobbers */);
8269 return result;
8272 #define vmlsl_high_lane_s16(a, b, c, d) \
8273 __extension__ \
8274 ({ \
8275 int16x8_t c_ = (c); \
8276 int16x8_t b_ = (b); \
8277 int32x4_t a_ = (a); \
8278 int32x4_t result; \
8279 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8280 : "=w"(result) \
8281 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8282 : /* No clobbers */); \
8283 result; \
8286 #define vmlsl_high_lane_s32(a, b, c, d) \
8287 __extension__ \
8288 ({ \
8289 int32x4_t c_ = (c); \
8290 int32x4_t b_ = (b); \
8291 int64x2_t a_ = (a); \
8292 int64x2_t result; \
8293 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8294 : "=w"(result) \
8295 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8296 : /* No clobbers */); \
8297 result; \
8300 #define vmlsl_high_lane_u16(a, b, c, d) \
8301 __extension__ \
8302 ({ \
8303 uint16x8_t c_ = (c); \
8304 uint16x8_t b_ = (b); \
8305 uint32x4_t a_ = (a); \
8306 uint32x4_t result; \
8307 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8308 : "=w"(result) \
8309 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8310 : /* No clobbers */); \
8311 result; \
8314 #define vmlsl_high_lane_u32(a, b, c, d) \
8315 __extension__ \
8316 ({ \
8317 uint32x4_t c_ = (c); \
8318 uint32x4_t b_ = (b); \
8319 uint64x2_t a_ = (a); \
8320 uint64x2_t result; \
8321 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8322 : "=w"(result) \
8323 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8324 : /* No clobbers */); \
8325 result; \
8328 #define vmlsl_high_laneq_s16(a, b, c, d) \
8329 __extension__ \
8330 ({ \
8331 int16x8_t c_ = (c); \
8332 int16x8_t b_ = (b); \
8333 int32x4_t a_ = (a); \
8334 int32x4_t result; \
8335 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8336 : "=w"(result) \
8337 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8338 : /* No clobbers */); \
8339 result; \
8342 #define vmlsl_high_laneq_s32(a, b, c, d) \
8343 __extension__ \
8344 ({ \
8345 int32x4_t c_ = (c); \
8346 int32x4_t b_ = (b); \
8347 int64x2_t a_ = (a); \
8348 int64x2_t result; \
8349 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8350 : "=w"(result) \
8351 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8352 : /* No clobbers */); \
8353 result; \
8356 #define vmlsl_high_laneq_u16(a, b, c, d) \
8357 __extension__ \
8358 ({ \
8359 uint16x8_t c_ = (c); \
8360 uint16x8_t b_ = (b); \
8361 uint32x4_t a_ = (a); \
8362 uint32x4_t result; \
8363 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8364 : "=w"(result) \
8365 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8366 : /* No clobbers */); \
8367 result; \
8370 #define vmlsl_high_laneq_u32(a, b, c, d) \
8371 __extension__ \
8372 ({ \
8373 uint32x4_t c_ = (c); \
8374 uint32x4_t b_ = (b); \
8375 uint64x2_t a_ = (a); \
8376 uint64x2_t result; \
8377 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8378 : "=w"(result) \
8379 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8380 : /* No clobbers */); \
8381 result; \
8384 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8385 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
8387 int32x4_t result;
8388 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
8389 : "=w"(result)
8390 : "0"(a), "w"(b), "x"(c)
8391 : /* No clobbers */);
8392 return result;
8395 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8396 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
8398 int64x2_t result;
8399 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
8400 : "=w"(result)
8401 : "0"(a), "w"(b), "w"(c)
8402 : /* No clobbers */);
8403 return result;
8406 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8407 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
8409 uint32x4_t result;
8410 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
8411 : "=w"(result)
8412 : "0"(a), "w"(b), "x"(c)
8413 : /* No clobbers */);
8414 return result;
8417 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8418 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
8420 uint64x2_t result;
8421 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
8422 : "=w"(result)
8423 : "0"(a), "w"(b), "w"(c)
8424 : /* No clobbers */);
8425 return result;
8428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8429 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
8431 int16x8_t result;
8432 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
8433 : "=w"(result)
8434 : "0"(a), "w"(b), "w"(c)
8435 : /* No clobbers */);
8436 return result;
8439 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8440 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
8442 int32x4_t result;
8443 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
8444 : "=w"(result)
8445 : "0"(a), "w"(b), "w"(c)
8446 : /* No clobbers */);
8447 return result;
8450 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8451 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
8453 int64x2_t result;
8454 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
8455 : "=w"(result)
8456 : "0"(a), "w"(b), "w"(c)
8457 : /* No clobbers */);
8458 return result;
8461 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8462 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
8464 uint16x8_t result;
8465 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
8466 : "=w"(result)
8467 : "0"(a), "w"(b), "w"(c)
8468 : /* No clobbers */);
8469 return result;
8472 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8473 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
8475 uint32x4_t result;
8476 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
8477 : "=w"(result)
8478 : "0"(a), "w"(b), "w"(c)
8479 : /* No clobbers */);
8480 return result;
8483 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8484 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
8486 uint64x2_t result;
8487 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
8488 : "=w"(result)
8489 : "0"(a), "w"(b), "w"(c)
8490 : /* No clobbers */);
8491 return result;
8494 #define vmlsl_lane_s16(a, b, c, d) \
8495 __extension__ \
8496 ({ \
8497 int16x4_t c_ = (c); \
8498 int16x4_t b_ = (b); \
8499 int32x4_t a_ = (a); \
8500 int32x4_t result; \
8501 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
8502 : "=w"(result) \
8503 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8504 : /* No clobbers */); \
8505 result; \
8508 #define vmlsl_lane_s32(a, b, c, d) \
8509 __extension__ \
8510 ({ \
8511 int32x2_t c_ = (c); \
8512 int32x2_t b_ = (b); \
8513 int64x2_t a_ = (a); \
8514 int64x2_t result; \
8515 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
8516 : "=w"(result) \
8517 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8518 : /* No clobbers */); \
8519 result; \
8522 #define vmlsl_lane_u16(a, b, c, d) \
8523 __extension__ \
8524 ({ \
8525 uint16x4_t c_ = (c); \
8526 uint16x4_t b_ = (b); \
8527 uint32x4_t a_ = (a); \
8528 uint32x4_t result; \
8529 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
8530 : "=w"(result) \
8531 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8532 : /* No clobbers */); \
8533 result; \
8536 #define vmlsl_lane_u32(a, b, c, d) \
8537 __extension__ \
8538 ({ \
8539 uint32x2_t c_ = (c); \
8540 uint32x2_t b_ = (b); \
8541 uint64x2_t a_ = (a); \
8542 uint64x2_t result; \
8543 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8544 : "=w"(result) \
8545 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8546 : /* No clobbers */); \
8547 result; \
8550 #define vmlsl_laneq_s16(a, b, c, d) \
8551 __extension__ \
8552 ({ \
8553 int16x8_t c_ = (c); \
8554 int16x4_t b_ = (b); \
8555 int32x4_t a_ = (a); \
8556 int32x4_t result; \
8557 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
8558 : "=w"(result) \
8559 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8560 : /* No clobbers */); \
8561 result; \
8564 #define vmlsl_laneq_s32(a, b, c, d) \
8565 __extension__ \
8566 ({ \
8567 int32x4_t c_ = (c); \
8568 int32x2_t b_ = (b); \
8569 int64x2_t a_ = (a); \
8570 int64x2_t result; \
8571 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
8572 : "=w"(result) \
8573 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8574 : /* No clobbers */); \
8575 result; \
8578 #define vmlsl_laneq_u16(a, b, c, d) \
8579 __extension__ \
8580 ({ \
8581 uint16x8_t c_ = (c); \
8582 uint16x4_t b_ = (b); \
8583 uint32x4_t a_ = (a); \
8584 uint32x4_t result; \
8585 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
8586 : "=w"(result) \
8587 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8588 : /* No clobbers */); \
8589 result; \
8592 #define vmlsl_laneq_u32(a, b, c, d) \
8593 __extension__ \
8594 ({ \
8595 uint32x4_t c_ = (c); \
8596 uint32x2_t b_ = (b); \
8597 uint64x2_t a_ = (a); \
8598 uint64x2_t result; \
8599 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8600 : "=w"(result) \
8601 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8602 : /* No clobbers */); \
8603 result; \
8606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8607 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
8609 int32x4_t result;
8610 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
8611 : "=w"(result)
8612 : "0"(a), "w"(b), "x"(c)
8613 : /* No clobbers */);
8614 return result;
8617 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8618 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
8620 int64x2_t result;
8621 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
8622 : "=w"(result)
8623 : "0"(a), "w"(b), "w"(c)
8624 : /* No clobbers */);
8625 return result;
8628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8629 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
8631 uint32x4_t result;
8632 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
8633 : "=w"(result)
8634 : "0"(a), "w"(b), "x"(c)
8635 : /* No clobbers */);
8636 return result;
8639 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8640 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
8642 uint64x2_t result;
8643 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
8644 : "=w"(result)
8645 : "0"(a), "w"(b), "w"(c)
8646 : /* No clobbers */);
8647 return result;
8650 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8651 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
8653 int16x8_t result;
8654 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
8655 : "=w"(result)
8656 : "0"(a), "w"(b), "w"(c)
8657 : /* No clobbers */);
8658 return result;
8661 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8662 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
8664 int32x4_t result;
8665 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
8666 : "=w"(result)
8667 : "0"(a), "w"(b), "w"(c)
8668 : /* No clobbers */);
8669 return result;
8672 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8673 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
8675 int64x2_t result;
8676 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
8677 : "=w"(result)
8678 : "0"(a), "w"(b), "w"(c)
8679 : /* No clobbers */);
8680 return result;
8683 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8684 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
8686 uint16x8_t result;
8687 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
8688 : "=w"(result)
8689 : "0"(a), "w"(b), "w"(c)
8690 : /* No clobbers */);
8691 return result;
8694 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8695 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
8697 uint32x4_t result;
8698 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
8699 : "=w"(result)
8700 : "0"(a), "w"(b), "w"(c)
8701 : /* No clobbers */);
8702 return result;
8705 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8706 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
8708 uint64x2_t result;
8709 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
8710 : "=w"(result)
8711 : "0"(a), "w"(b), "w"(c)
8712 : /* No clobbers */);
8713 return result;
8716 #define vmlsq_lane_f32(a, b, c, d) \
8717 __extension__ \
8718 ({ \
8719 float32x4_t c_ = (c); \
8720 float32x4_t b_ = (b); \
8721 float32x4_t a_ = (a); \
8722 float32x4_t result; \
8723 float32x4_t t1; \
8724 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
8725 : "=w"(result), "=w"(t1) \
8726 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8727 : /* No clobbers */); \
8728 result; \
8731 #define vmlsq_lane_s16(a, b, c, d) \
8732 __extension__ \
8733 ({ \
8734 int16x8_t c_ = (c); \
8735 int16x8_t b_ = (b); \
8736 int16x8_t a_ = (a); \
8737 int16x8_t result; \
8738 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
8739 : "=w"(result) \
8740 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8741 : /* No clobbers */); \
8742 result; \
8745 #define vmlsq_lane_s32(a, b, c, d) \
8746 __extension__ \
8747 ({ \
8748 int32x4_t c_ = (c); \
8749 int32x4_t b_ = (b); \
8750 int32x4_t a_ = (a); \
8751 int32x4_t result; \
8752 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
8753 : "=w"(result) \
8754 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8755 : /* No clobbers */); \
8756 result; \
8759 #define vmlsq_lane_u16(a, b, c, d) \
8760 __extension__ \
8761 ({ \
8762 uint16x8_t c_ = (c); \
8763 uint16x8_t b_ = (b); \
8764 uint16x8_t a_ = (a); \
8765 uint16x8_t result; \
8766 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
8767 : "=w"(result) \
8768 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8769 : /* No clobbers */); \
8770 result; \
8773 #define vmlsq_lane_u32(a, b, c, d) \
8774 __extension__ \
8775 ({ \
8776 uint32x4_t c_ = (c); \
8777 uint32x4_t b_ = (b); \
8778 uint32x4_t a_ = (a); \
8779 uint32x4_t result; \
8780 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
8781 : "=w"(result) \
8782 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8783 : /* No clobbers */); \
8784 result; \
8787 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
8788 __extension__ \
8789 ({ \
8790 float32x4_t __c_ = (__c); \
8791 float32x4_t __b_ = (__b); \
8792 float32x4_t __a_ = (__a); \
8793 float32x4_t __result; \
8794 float32x4_t __t1; \
8795 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
8796 : "=w"(__result), "=w"(__t1) \
8797 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
8798 : /* No clobbers */); \
8799 __result; \
8802 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
8803 __extension__ \
8804 ({ \
8805 int16x8_t __c_ = (__c); \
8806 int16x8_t __b_ = (__b); \
8807 int16x8_t __a_ = (__a); \
8808 int16x8_t __result; \
8809 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
8810 : "=w"(__result) \
8811 : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \
8812 : /* No clobbers */); \
8813 __result; \
8816 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
8817 __extension__ \
8818 ({ \
8819 int32x4_t __c_ = (__c); \
8820 int32x4_t __b_ = (__b); \
8821 int32x4_t __a_ = (__a); \
8822 int32x4_t __result; \
8823 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
8824 : "=w"(__result) \
8825 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
8826 : /* No clobbers */); \
8827 __result; \
8830 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
8831 __extension__ \
8832 ({ \
8833 uint16x8_t __c_ = (__c); \
8834 uint16x8_t __b_ = (__b); \
8835 uint16x8_t __a_ = (__a); \
8836 uint16x8_t __result; \
8837 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
8838 : "=w"(__result) \
8839 : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \
8840 : /* No clobbers */); \
8841 __result; \
8844 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
8845 __extension__ \
8846 ({ \
8847 uint32x4_t __c_ = (__c); \
8848 uint32x4_t __b_ = (__b); \
8849 uint32x4_t __a_ = (__a); \
8850 uint32x4_t __result; \
8851 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
8852 : "=w"(__result) \
8853 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
8854 : /* No clobbers */); \
8855 __result; \
8858 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8859 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
8861 float32x4_t result;
8862 float32x4_t t1;
8863 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
8864 : "=w"(result), "=w"(t1)
8865 : "0"(a), "w"(b), "w"(c)
8866 : /* No clobbers */);
8867 return result;
8870 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8871 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
8873 float64x2_t result;
8874 float64x2_t t1;
8875 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
8876 : "=w"(result), "=w"(t1)
8877 : "0"(a), "w"(b), "x"(c)
8878 : /* No clobbers */);
8879 return result;
8882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8883 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
8885 int16x8_t result;
8886 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8887 : "=w"(result)
8888 : "0"(a), "w"(b), "x"(c)
8889 : /* No clobbers */);
8890 return result;
8893 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8894 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8896 int32x4_t result;
8897 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8898 : "=w"(result)
8899 : "0"(a), "w"(b), "w"(c)
8900 : /* No clobbers */);
8901 return result;
8904 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8905 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8907 uint16x8_t result;
8908 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8909 : "=w"(result)
8910 : "0"(a), "w"(b), "x"(c)
8911 : /* No clobbers */);
8912 return result;
8915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8916 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8918 uint32x4_t result;
8919 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8920 : "=w"(result)
8921 : "0"(a), "w"(b), "w"(c)
8922 : /* No clobbers */);
8923 return result;
8926 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8927 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8929 int8x16_t result;
8930 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8931 : "=w"(result)
8932 : "0"(a), "w"(b), "w"(c)
8933 : /* No clobbers */);
8934 return result;
8937 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8938 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8940 int16x8_t result;
8941 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8942 : "=w"(result)
8943 : "0"(a), "w"(b), "w"(c)
8944 : /* No clobbers */);
8945 return result;
8948 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8949 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8951 int32x4_t result;
8952 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8953 : "=w"(result)
8954 : "0"(a), "w"(b), "w"(c)
8955 : /* No clobbers */);
8956 return result;
8959 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8960 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8962 uint8x16_t result;
8963 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8964 : "=w"(result)
8965 : "0"(a), "w"(b), "w"(c)
8966 : /* No clobbers */);
8967 return result;
8970 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8971 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8973 uint16x8_t result;
8974 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8975 : "=w"(result)
8976 : "0"(a), "w"(b), "w"(c)
8977 : /* No clobbers */);
8978 return result;
8981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8982 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8984 uint32x4_t result;
8985 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8986 : "=w"(result)
8987 : "0"(a), "w"(b), "w"(c)
8988 : /* No clobbers */);
8989 return result;
8992 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8993 vmov_n_f32 (float32_t a)
8995 float32x2_t result;
8996 __asm__ ("dup %0.2s, %w1"
8997 : "=w"(result)
8998 : "r"(a)
8999 : /* No clobbers */);
9000 return result;
9003 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9004 vmov_n_p8 (uint32_t a)
9006 poly8x8_t result;
9007 __asm__ ("dup %0.8b,%w1"
9008 : "=w"(result)
9009 : "r"(a)
9010 : /* No clobbers */);
9011 return result;
9014 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9015 vmov_n_p16 (uint32_t a)
9017 poly16x4_t result;
9018 __asm__ ("dup %0.4h,%w1"
9019 : "=w"(result)
9020 : "r"(a)
9021 : /* No clobbers */);
9022 return result;
9025 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9026 vmov_n_s8 (int32_t a)
9028 int8x8_t result;
9029 __asm__ ("dup %0.8b,%w1"
9030 : "=w"(result)
9031 : "r"(a)
9032 : /* No clobbers */);
9033 return result;
9036 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9037 vmov_n_s16 (int32_t a)
9039 int16x4_t result;
9040 __asm__ ("dup %0.4h,%w1"
9041 : "=w"(result)
9042 : "r"(a)
9043 : /* No clobbers */);
9044 return result;
9047 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9048 vmov_n_s32 (int32_t a)
9050 int32x2_t result;
9051 __asm__ ("dup %0.2s,%w1"
9052 : "=w"(result)
9053 : "r"(a)
9054 : /* No clobbers */);
9055 return result;
9058 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9059 vmov_n_s64 (int64_t a)
9061 int64x1_t result;
9062 __asm__ ("ins %0.d[0],%x1"
9063 : "=w"(result)
9064 : "r"(a)
9065 : /* No clobbers */);
9066 return result;
9069 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9070 vmov_n_u8 (uint32_t a)
9072 uint8x8_t result;
9073 __asm__ ("dup %0.8b,%w1"
9074 : "=w"(result)
9075 : "r"(a)
9076 : /* No clobbers */);
9077 return result;
9080 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9081 vmov_n_u16 (uint32_t a)
9083 uint16x4_t result;
9084 __asm__ ("dup %0.4h,%w1"
9085 : "=w"(result)
9086 : "r"(a)
9087 : /* No clobbers */);
9088 return result;
9091 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9092 vmov_n_u32 (uint32_t a)
9094 uint32x2_t result;
9095 __asm__ ("dup %0.2s,%w1"
9096 : "=w"(result)
9097 : "r"(a)
9098 : /* No clobbers */);
9099 return result;
9102 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9103 vmov_n_u64 (uint64_t a)
9105 uint64x1_t result;
9106 __asm__ ("ins %0.d[0],%x1"
9107 : "=w"(result)
9108 : "r"(a)
9109 : /* No clobbers */);
9110 return result;
9113 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9114 vmovl_high_s8 (int8x16_t a)
9116 int16x8_t result;
9117 __asm__ ("sshll2 %0.8h,%1.16b,#0"
9118 : "=w"(result)
9119 : "w"(a)
9120 : /* No clobbers */);
9121 return result;
9124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9125 vmovl_high_s16 (int16x8_t a)
9127 int32x4_t result;
9128 __asm__ ("sshll2 %0.4s,%1.8h,#0"
9129 : "=w"(result)
9130 : "w"(a)
9131 : /* No clobbers */);
9132 return result;
9135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9136 vmovl_high_s32 (int32x4_t a)
9138 int64x2_t result;
9139 __asm__ ("sshll2 %0.2d,%1.4s,#0"
9140 : "=w"(result)
9141 : "w"(a)
9142 : /* No clobbers */);
9143 return result;
9146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9147 vmovl_high_u8 (uint8x16_t a)
9149 uint16x8_t result;
9150 __asm__ ("ushll2 %0.8h,%1.16b,#0"
9151 : "=w"(result)
9152 : "w"(a)
9153 : /* No clobbers */);
9154 return result;
9157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9158 vmovl_high_u16 (uint16x8_t a)
9160 uint32x4_t result;
9161 __asm__ ("ushll2 %0.4s,%1.8h,#0"
9162 : "=w"(result)
9163 : "w"(a)
9164 : /* No clobbers */);
9165 return result;
9168 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9169 vmovl_high_u32 (uint32x4_t a)
9171 uint64x2_t result;
9172 __asm__ ("ushll2 %0.2d,%1.4s,#0"
9173 : "=w"(result)
9174 : "w"(a)
9175 : /* No clobbers */);
9176 return result;
9179 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9180 vmovl_s8 (int8x8_t a)
9182 int16x8_t result;
9183 __asm__ ("sshll %0.8h,%1.8b,#0"
9184 : "=w"(result)
9185 : "w"(a)
9186 : /* No clobbers */);
9187 return result;
9190 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9191 vmovl_s16 (int16x4_t a)
9193 int32x4_t result;
9194 __asm__ ("sshll %0.4s,%1.4h,#0"
9195 : "=w"(result)
9196 : "w"(a)
9197 : /* No clobbers */);
9198 return result;
9201 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9202 vmovl_s32 (int32x2_t a)
9204 int64x2_t result;
9205 __asm__ ("sshll %0.2d,%1.2s,#0"
9206 : "=w"(result)
9207 : "w"(a)
9208 : /* No clobbers */);
9209 return result;
9212 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9213 vmovl_u8 (uint8x8_t a)
9215 uint16x8_t result;
9216 __asm__ ("ushll %0.8h,%1.8b,#0"
9217 : "=w"(result)
9218 : "w"(a)
9219 : /* No clobbers */);
9220 return result;
9223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9224 vmovl_u16 (uint16x4_t a)
9226 uint32x4_t result;
9227 __asm__ ("ushll %0.4s,%1.4h,#0"
9228 : "=w"(result)
9229 : "w"(a)
9230 : /* No clobbers */);
9231 return result;
9234 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9235 vmovl_u32 (uint32x2_t a)
9237 uint64x2_t result;
9238 __asm__ ("ushll %0.2d,%1.2s,#0"
9239 : "=w"(result)
9240 : "w"(a)
9241 : /* No clobbers */);
9242 return result;
9245 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9246 vmovn_high_s16 (int8x8_t a, int16x8_t b)
9248 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9249 __asm__ ("xtn2 %0.16b,%1.8h"
9250 : "+w"(result)
9251 : "w"(b)
9252 : /* No clobbers */);
9253 return result;
9256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9257 vmovn_high_s32 (int16x4_t a, int32x4_t b)
9259 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
9260 __asm__ ("xtn2 %0.8h,%1.4s"
9261 : "+w"(result)
9262 : "w"(b)
9263 : /* No clobbers */);
9264 return result;
9267 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9268 vmovn_high_s64 (int32x2_t a, int64x2_t b)
9270 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
9271 __asm__ ("xtn2 %0.4s,%1.2d"
9272 : "+w"(result)
9273 : "w"(b)
9274 : /* No clobbers */);
9275 return result;
9278 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9279 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9281 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9282 __asm__ ("xtn2 %0.16b,%1.8h"
9283 : "+w"(result)
9284 : "w"(b)
9285 : /* No clobbers */);
9286 return result;
9289 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9290 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9292 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9293 __asm__ ("xtn2 %0.8h,%1.4s"
9294 : "+w"(result)
9295 : "w"(b)
9296 : /* No clobbers */);
9297 return result;
9300 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9301 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9303 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9304 __asm__ ("xtn2 %0.4s,%1.2d"
9305 : "+w"(result)
9306 : "w"(b)
9307 : /* No clobbers */);
9308 return result;
9311 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9312 vmovn_s16 (int16x8_t a)
9314 int8x8_t result;
9315 __asm__ ("xtn %0.8b,%1.8h"
9316 : "=w"(result)
9317 : "w"(a)
9318 : /* No clobbers */);
9319 return result;
9322 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9323 vmovn_s32 (int32x4_t a)
9325 int16x4_t result;
9326 __asm__ ("xtn %0.4h,%1.4s"
9327 : "=w"(result)
9328 : "w"(a)
9329 : /* No clobbers */);
9330 return result;
9333 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9334 vmovn_s64 (int64x2_t a)
9336 int32x2_t result;
9337 __asm__ ("xtn %0.2s,%1.2d"
9338 : "=w"(result)
9339 : "w"(a)
9340 : /* No clobbers */);
9341 return result;
9344 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9345 vmovn_u16 (uint16x8_t a)
9347 uint8x8_t result;
9348 __asm__ ("xtn %0.8b,%1.8h"
9349 : "=w"(result)
9350 : "w"(a)
9351 : /* No clobbers */);
9352 return result;
9355 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9356 vmovn_u32 (uint32x4_t a)
9358 uint16x4_t result;
9359 __asm__ ("xtn %0.4h,%1.4s"
9360 : "=w"(result)
9361 : "w"(a)
9362 : /* No clobbers */);
9363 return result;
9366 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9367 vmovn_u64 (uint64x2_t a)
9369 uint32x2_t result;
9370 __asm__ ("xtn %0.2s,%1.2d"
9371 : "=w"(result)
9372 : "w"(a)
9373 : /* No clobbers */);
9374 return result;
9377 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9378 vmovq_n_f32 (float32_t a)
9380 float32x4_t result;
9381 __asm__ ("dup %0.4s, %w1"
9382 : "=w"(result)
9383 : "r"(a)
9384 : /* No clobbers */);
9385 return result;
9388 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9389 vmovq_n_f64 (float64_t a)
9391 return (float64x2_t) {a, a};
9394 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9395 vmovq_n_p8 (uint32_t a)
9397 poly8x16_t result;
9398 __asm__ ("dup %0.16b,%w1"
9399 : "=w"(result)
9400 : "r"(a)
9401 : /* No clobbers */);
9402 return result;
9405 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9406 vmovq_n_p16 (uint32_t a)
9408 poly16x8_t result;
9409 __asm__ ("dup %0.8h,%w1"
9410 : "=w"(result)
9411 : "r"(a)
9412 : /* No clobbers */);
9413 return result;
9416 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9417 vmovq_n_s8 (int32_t a)
9419 int8x16_t result;
9420 __asm__ ("dup %0.16b,%w1"
9421 : "=w"(result)
9422 : "r"(a)
9423 : /* No clobbers */);
9424 return result;
9427 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9428 vmovq_n_s16 (int32_t a)
9430 int16x8_t result;
9431 __asm__ ("dup %0.8h,%w1"
9432 : "=w"(result)
9433 : "r"(a)
9434 : /* No clobbers */);
9435 return result;
9438 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9439 vmovq_n_s32 (int32_t a)
9441 int32x4_t result;
9442 __asm__ ("dup %0.4s,%w1"
9443 : "=w"(result)
9444 : "r"(a)
9445 : /* No clobbers */);
9446 return result;
9449 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9450 vmovq_n_s64 (int64_t a)
9452 int64x2_t result;
9453 __asm__ ("dup %0.2d,%x1"
9454 : "=w"(result)
9455 : "r"(a)
9456 : /* No clobbers */);
9457 return result;
9460 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9461 vmovq_n_u8 (uint32_t a)
9463 uint8x16_t result;
9464 __asm__ ("dup %0.16b,%w1"
9465 : "=w"(result)
9466 : "r"(a)
9467 : /* No clobbers */);
9468 return result;
9471 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9472 vmovq_n_u16 (uint32_t a)
9474 uint16x8_t result;
9475 __asm__ ("dup %0.8h,%w1"
9476 : "=w"(result)
9477 : "r"(a)
9478 : /* No clobbers */);
9479 return result;
9482 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9483 vmovq_n_u32 (uint32_t a)
9485 uint32x4_t result;
9486 __asm__ ("dup %0.4s,%w1"
9487 : "=w"(result)
9488 : "r"(a)
9489 : /* No clobbers */);
9490 return result;
9493 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9494 vmovq_n_u64 (uint64_t a)
9496 uint64x2_t result;
9497 __asm__ ("dup %0.2d,%x1"
9498 : "=w"(result)
9499 : "r"(a)
9500 : /* No clobbers */);
9501 return result;
9504 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9505 vmul_n_f32 (float32x2_t a, float32_t b)
9507 float32x2_t result;
9508 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
9509 : "=w"(result)
9510 : "w"(a), "w"(b)
9511 : /* No clobbers */);
9512 return result;
9515 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9516 vmul_n_s16 (int16x4_t a, int16_t b)
9518 int16x4_t result;
9519 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
9520 : "=w"(result)
9521 : "w"(a), "x"(b)
9522 : /* No clobbers */);
9523 return result;
9526 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9527 vmul_n_s32 (int32x2_t a, int32_t b)
9529 int32x2_t result;
9530 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
9531 : "=w"(result)
9532 : "w"(a), "w"(b)
9533 : /* No clobbers */);
9534 return result;
9537 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9538 vmul_n_u16 (uint16x4_t a, uint16_t b)
9540 uint16x4_t result;
9541 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
9542 : "=w"(result)
9543 : "w"(a), "x"(b)
9544 : /* No clobbers */);
9545 return result;
9548 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9549 vmul_n_u32 (uint32x2_t a, uint32_t b)
9551 uint32x2_t result;
9552 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
9553 : "=w"(result)
9554 : "w"(a), "w"(b)
9555 : /* No clobbers */);
9556 return result;
9559 #define vmuld_lane_f64(a, b, c) \
9560 __extension__ \
9561 ({ \
9562 float64x2_t b_ = (b); \
9563 float64_t a_ = (a); \
9564 float64_t result; \
9565 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
9566 : "=w"(result) \
9567 : "w"(a_), "w"(b_), "i"(c) \
9568 : /* No clobbers */); \
9569 result; \
9572 #define vmull_high_lane_s16(a, b, c) \
9573 __extension__ \
9574 ({ \
9575 int16x8_t b_ = (b); \
9576 int16x8_t a_ = (a); \
9577 int32x4_t result; \
9578 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
9579 : "=w"(result) \
9580 : "w"(a_), "x"(b_), "i"(c) \
9581 : /* No clobbers */); \
9582 result; \
9585 #define vmull_high_lane_s32(a, b, c) \
9586 __extension__ \
9587 ({ \
9588 int32x4_t b_ = (b); \
9589 int32x4_t a_ = (a); \
9590 int64x2_t result; \
9591 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
9592 : "=w"(result) \
9593 : "w"(a_), "w"(b_), "i"(c) \
9594 : /* No clobbers */); \
9595 result; \
9598 #define vmull_high_lane_u16(a, b, c) \
9599 __extension__ \
9600 ({ \
9601 uint16x8_t b_ = (b); \
9602 uint16x8_t a_ = (a); \
9603 uint32x4_t result; \
9604 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
9605 : "=w"(result) \
9606 : "w"(a_), "x"(b_), "i"(c) \
9607 : /* No clobbers */); \
9608 result; \
9611 #define vmull_high_lane_u32(a, b, c) \
9612 __extension__ \
9613 ({ \
9614 uint32x4_t b_ = (b); \
9615 uint32x4_t a_ = (a); \
9616 uint64x2_t result; \
9617 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
9618 : "=w"(result) \
9619 : "w"(a_), "w"(b_), "i"(c) \
9620 : /* No clobbers */); \
9621 result; \
9624 #define vmull_high_laneq_s16(a, b, c) \
9625 __extension__ \
9626 ({ \
9627 int16x8_t b_ = (b); \
9628 int16x8_t a_ = (a); \
9629 int32x4_t result; \
9630 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
9631 : "=w"(result) \
9632 : "w"(a_), "x"(b_), "i"(c) \
9633 : /* No clobbers */); \
9634 result; \
9637 #define vmull_high_laneq_s32(a, b, c) \
9638 __extension__ \
9639 ({ \
9640 int32x4_t b_ = (b); \
9641 int32x4_t a_ = (a); \
9642 int64x2_t result; \
9643 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
9644 : "=w"(result) \
9645 : "w"(a_), "w"(b_), "i"(c) \
9646 : /* No clobbers */); \
9647 result; \
9650 #define vmull_high_laneq_u16(a, b, c) \
9651 __extension__ \
9652 ({ \
9653 uint16x8_t b_ = (b); \
9654 uint16x8_t a_ = (a); \
9655 uint32x4_t result; \
9656 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
9657 : "=w"(result) \
9658 : "w"(a_), "x"(b_), "i"(c) \
9659 : /* No clobbers */); \
9660 result; \
9663 #define vmull_high_laneq_u32(a, b, c) \
9664 __extension__ \
9665 ({ \
9666 uint32x4_t b_ = (b); \
9667 uint32x4_t a_ = (a); \
9668 uint64x2_t result; \
9669 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
9670 : "=w"(result) \
9671 : "w"(a_), "w"(b_), "i"(c) \
9672 : /* No clobbers */); \
9673 result; \
9676 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9677 vmull_high_n_s16 (int16x8_t a, int16_t b)
9679 int32x4_t result;
9680 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
9681 : "=w"(result)
9682 : "w"(a), "x"(b)
9683 : /* No clobbers */);
9684 return result;
9687 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9688 vmull_high_n_s32 (int32x4_t a, int32_t b)
9690 int64x2_t result;
9691 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
9692 : "=w"(result)
9693 : "w"(a), "w"(b)
9694 : /* No clobbers */);
9695 return result;
9698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9699 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
9701 uint32x4_t result;
9702 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
9703 : "=w"(result)
9704 : "w"(a), "x"(b)
9705 : /* No clobbers */);
9706 return result;
9709 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9710 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
9712 uint64x2_t result;
9713 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
9714 : "=w"(result)
9715 : "w"(a), "w"(b)
9716 : /* No clobbers */);
9717 return result;
9720 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9721 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
9723 poly16x8_t result;
9724 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
9725 : "=w"(result)
9726 : "w"(a), "w"(b)
9727 : /* No clobbers */);
9728 return result;
9731 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9732 vmull_high_s8 (int8x16_t a, int8x16_t b)
9734 int16x8_t result;
9735 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
9736 : "=w"(result)
9737 : "w"(a), "w"(b)
9738 : /* No clobbers */);
9739 return result;
9742 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9743 vmull_high_s16 (int16x8_t a, int16x8_t b)
9745 int32x4_t result;
9746 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
9747 : "=w"(result)
9748 : "w"(a), "w"(b)
9749 : /* No clobbers */);
9750 return result;
9753 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9754 vmull_high_s32 (int32x4_t a, int32x4_t b)
9756 int64x2_t result;
9757 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
9758 : "=w"(result)
9759 : "w"(a), "w"(b)
9760 : /* No clobbers */);
9761 return result;
9764 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9765 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
9767 uint16x8_t result;
9768 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
9769 : "=w"(result)
9770 : "w"(a), "w"(b)
9771 : /* No clobbers */);
9772 return result;
9775 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9776 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
9778 uint32x4_t result;
9779 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
9780 : "=w"(result)
9781 : "w"(a), "w"(b)
9782 : /* No clobbers */);
9783 return result;
9786 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9787 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
9789 uint64x2_t result;
9790 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
9791 : "=w"(result)
9792 : "w"(a), "w"(b)
9793 : /* No clobbers */);
9794 return result;
9797 #define vmull_lane_s16(a, b, c) \
9798 __extension__ \
9799 ({ \
9800 int16x4_t b_ = (b); \
9801 int16x4_t a_ = (a); \
9802 int32x4_t result; \
9803 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
9804 : "=w"(result) \
9805 : "w"(a_), "x"(b_), "i"(c) \
9806 : /* No clobbers */); \
9807 result; \
9810 #define vmull_lane_s32(a, b, c) \
9811 __extension__ \
9812 ({ \
9813 int32x2_t b_ = (b); \
9814 int32x2_t a_ = (a); \
9815 int64x2_t result; \
9816 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
9817 : "=w"(result) \
9818 : "w"(a_), "w"(b_), "i"(c) \
9819 : /* No clobbers */); \
9820 result; \
9823 #define vmull_lane_u16(a, b, c) \
9824 __extension__ \
9825 ({ \
9826 uint16x4_t b_ = (b); \
9827 uint16x4_t a_ = (a); \
9828 uint32x4_t result; \
9829 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
9830 : "=w"(result) \
9831 : "w"(a_), "x"(b_), "i"(c) \
9832 : /* No clobbers */); \
9833 result; \
9836 #define vmull_lane_u32(a, b, c) \
9837 __extension__ \
9838 ({ \
9839 uint32x2_t b_ = (b); \
9840 uint32x2_t a_ = (a); \
9841 uint64x2_t result; \
9842 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
9843 : "=w"(result) \
9844 : "w"(a_), "w"(b_), "i"(c) \
9845 : /* No clobbers */); \
9846 result; \
9849 #define vmull_laneq_s16(a, b, c) \
9850 __extension__ \
9851 ({ \
9852 int16x8_t b_ = (b); \
9853 int16x4_t a_ = (a); \
9854 int32x4_t result; \
9855 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
9856 : "=w"(result) \
9857 : "w"(a_), "x"(b_), "i"(c) \
9858 : /* No clobbers */); \
9859 result; \
9862 #define vmull_laneq_s32(a, b, c) \
9863 __extension__ \
9864 ({ \
9865 int32x4_t b_ = (b); \
9866 int32x2_t a_ = (a); \
9867 int64x2_t result; \
9868 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
9869 : "=w"(result) \
9870 : "w"(a_), "w"(b_), "i"(c) \
9871 : /* No clobbers */); \
9872 result; \
9875 #define vmull_laneq_u16(a, b, c) \
9876 __extension__ \
9877 ({ \
9878 uint16x8_t b_ = (b); \
9879 uint16x4_t a_ = (a); \
9880 uint32x4_t result; \
9881 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
9882 : "=w"(result) \
9883 : "w"(a_), "x"(b_), "i"(c) \
9884 : /* No clobbers */); \
9885 result; \
9888 #define vmull_laneq_u32(a, b, c) \
9889 __extension__ \
9890 ({ \
9891 uint32x4_t b_ = (b); \
9892 uint32x2_t a_ = (a); \
9893 uint64x2_t result; \
9894 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
9895 : "=w"(result) \
9896 : "w"(a_), "w"(b_), "i"(c) \
9897 : /* No clobbers */); \
9898 result; \
9901 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9902 vmull_n_s16 (int16x4_t a, int16_t b)
9904 int32x4_t result;
9905 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
9906 : "=w"(result)
9907 : "w"(a), "x"(b)
9908 : /* No clobbers */);
9909 return result;
9912 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9913 vmull_n_s32 (int32x2_t a, int32_t b)
9915 int64x2_t result;
9916 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
9917 : "=w"(result)
9918 : "w"(a), "w"(b)
9919 : /* No clobbers */);
9920 return result;
9923 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9924 vmull_n_u16 (uint16x4_t a, uint16_t b)
9926 uint32x4_t result;
9927 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
9928 : "=w"(result)
9929 : "w"(a), "x"(b)
9930 : /* No clobbers */);
9931 return result;
9934 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9935 vmull_n_u32 (uint32x2_t a, uint32_t b)
9937 uint64x2_t result;
9938 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
9939 : "=w"(result)
9940 : "w"(a), "w"(b)
9941 : /* No clobbers */);
9942 return result;
9945 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9946 vmull_p8 (poly8x8_t a, poly8x8_t b)
9948 poly16x8_t result;
9949 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
9950 : "=w"(result)
9951 : "w"(a), "w"(b)
9952 : /* No clobbers */);
9953 return result;
9956 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9957 vmull_s8 (int8x8_t a, int8x8_t b)
9959 int16x8_t result;
9960 __asm__ ("smull %0.8h, %1.8b, %2.8b"
9961 : "=w"(result)
9962 : "w"(a), "w"(b)
9963 : /* No clobbers */);
9964 return result;
9967 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9968 vmull_s16 (int16x4_t a, int16x4_t b)
9970 int32x4_t result;
9971 __asm__ ("smull %0.4s, %1.4h, %2.4h"
9972 : "=w"(result)
9973 : "w"(a), "w"(b)
9974 : /* No clobbers */);
9975 return result;
9978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9979 vmull_s32 (int32x2_t a, int32x2_t b)
9981 int64x2_t result;
9982 __asm__ ("smull %0.2d, %1.2s, %2.2s"
9983 : "=w"(result)
9984 : "w"(a), "w"(b)
9985 : /* No clobbers */);
9986 return result;
9989 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9990 vmull_u8 (uint8x8_t a, uint8x8_t b)
9992 uint16x8_t result;
9993 __asm__ ("umull %0.8h, %1.8b, %2.8b"
9994 : "=w"(result)
9995 : "w"(a), "w"(b)
9996 : /* No clobbers */);
9997 return result;
10000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10001 vmull_u16 (uint16x4_t a, uint16x4_t b)
10003 uint32x4_t result;
10004 __asm__ ("umull %0.4s, %1.4h, %2.4h"
10005 : "=w"(result)
10006 : "w"(a), "w"(b)
10007 : /* No clobbers */);
10008 return result;
10011 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10012 vmull_u32 (uint32x2_t a, uint32x2_t b)
10014 uint64x2_t result;
10015 __asm__ ("umull %0.2d, %1.2s, %2.2s"
10016 : "=w"(result)
10017 : "w"(a), "w"(b)
10018 : /* No clobbers */);
10019 return result;
10022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10023 vmulq_n_f32 (float32x4_t a, float32_t b)
10025 float32x4_t result;
10026 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
10027 : "=w"(result)
10028 : "w"(a), "w"(b)
10029 : /* No clobbers */);
10030 return result;
10033 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10034 vmulq_n_f64 (float64x2_t a, float64_t b)
10036 float64x2_t result;
10037 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
10038 : "=w"(result)
10039 : "w"(a), "w"(b)
10040 : /* No clobbers */);
10041 return result;
10044 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10045 vmulq_n_s16 (int16x8_t a, int16_t b)
10047 int16x8_t result;
10048 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10049 : "=w"(result)
10050 : "w"(a), "x"(b)
10051 : /* No clobbers */);
10052 return result;
10055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10056 vmulq_n_s32 (int32x4_t a, int32_t b)
10058 int32x4_t result;
10059 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10060 : "=w"(result)
10061 : "w"(a), "w"(b)
10062 : /* No clobbers */);
10063 return result;
10066 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10067 vmulq_n_u16 (uint16x8_t a, uint16_t b)
10069 uint16x8_t result;
10070 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10071 : "=w"(result)
10072 : "w"(a), "x"(b)
10073 : /* No clobbers */);
10074 return result;
10077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10078 vmulq_n_u32 (uint32x4_t a, uint32_t b)
10080 uint32x4_t result;
10081 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10082 : "=w"(result)
10083 : "w"(a), "w"(b)
10084 : /* No clobbers */);
10085 return result;
10088 #define vmuls_lane_f32(a, b, c) \
10089 __extension__ \
10090 ({ \
10091 float32x4_t b_ = (b); \
10092 float32_t a_ = (a); \
10093 float32_t result; \
10094 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
10095 : "=w"(result) \
10096 : "w"(a_), "w"(b_), "i"(c) \
10097 : /* No clobbers */); \
10098 result; \
10101 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10102 vmulx_f32 (float32x2_t a, float32x2_t b)
10104 float32x2_t result;
10105 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
10106 : "=w"(result)
10107 : "w"(a), "w"(b)
10108 : /* No clobbers */);
10109 return result;
10112 #define vmulx_lane_f32(a, b, c) \
10113 __extension__ \
10114 ({ \
10115 float32x4_t b_ = (b); \
10116 float32x2_t a_ = (a); \
10117 float32x2_t result; \
10118 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
10119 : "=w"(result) \
10120 : "w"(a_), "w"(b_), "i"(c) \
10121 : /* No clobbers */); \
10122 result; \
10125 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10126 vmulxd_f64 (float64_t a, float64_t b)
10128 float64_t result;
10129 __asm__ ("fmulx %d0, %d1, %d2"
10130 : "=w"(result)
10131 : "w"(a), "w"(b)
10132 : /* No clobbers */);
10133 return result;
10136 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10137 vmulxq_f32 (float32x4_t a, float32x4_t b)
10139 float32x4_t result;
10140 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
10141 : "=w"(result)
10142 : "w"(a), "w"(b)
10143 : /* No clobbers */);
10144 return result;
10147 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10148 vmulxq_f64 (float64x2_t a, float64x2_t b)
10150 float64x2_t result;
10151 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
10152 : "=w"(result)
10153 : "w"(a), "w"(b)
10154 : /* No clobbers */);
10155 return result;
10158 #define vmulxq_lane_f32(a, b, c) \
10159 __extension__ \
10160 ({ \
10161 float32x4_t b_ = (b); \
10162 float32x4_t a_ = (a); \
10163 float32x4_t result; \
10164 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
10165 : "=w"(result) \
10166 : "w"(a_), "w"(b_), "i"(c) \
10167 : /* No clobbers */); \
10168 result; \
10171 #define vmulxq_lane_f64(a, b, c) \
10172 __extension__ \
10173 ({ \
10174 float64x2_t b_ = (b); \
10175 float64x2_t a_ = (a); \
10176 float64x2_t result; \
10177 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
10178 : "=w"(result) \
10179 : "w"(a_), "w"(b_), "i"(c) \
10180 : /* No clobbers */); \
10181 result; \
10184 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10185 vmulxs_f32 (float32_t a, float32_t b)
10187 float32_t result;
10188 __asm__ ("fmulx %s0, %s1, %s2"
10189 : "=w"(result)
10190 : "w"(a), "w"(b)
10191 : /* No clobbers */);
10192 return result;
10195 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10196 vmvn_p8 (poly8x8_t a)
10198 poly8x8_t result;
10199 __asm__ ("mvn %0.8b,%1.8b"
10200 : "=w"(result)
10201 : "w"(a)
10202 : /* No clobbers */);
10203 return result;
10206 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10207 vmvn_s8 (int8x8_t a)
10209 int8x8_t result;
10210 __asm__ ("mvn %0.8b,%1.8b"
10211 : "=w"(result)
10212 : "w"(a)
10213 : /* No clobbers */);
10214 return result;
10217 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10218 vmvn_s16 (int16x4_t a)
10220 int16x4_t result;
10221 __asm__ ("mvn %0.8b,%1.8b"
10222 : "=w"(result)
10223 : "w"(a)
10224 : /* No clobbers */);
10225 return result;
10228 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10229 vmvn_s32 (int32x2_t a)
10231 int32x2_t result;
10232 __asm__ ("mvn %0.8b,%1.8b"
10233 : "=w"(result)
10234 : "w"(a)
10235 : /* No clobbers */);
10236 return result;
10239 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10240 vmvn_u8 (uint8x8_t a)
10242 uint8x8_t result;
10243 __asm__ ("mvn %0.8b,%1.8b"
10244 : "=w"(result)
10245 : "w"(a)
10246 : /* No clobbers */);
10247 return result;
10250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10251 vmvn_u16 (uint16x4_t a)
10253 uint16x4_t result;
10254 __asm__ ("mvn %0.8b,%1.8b"
10255 : "=w"(result)
10256 : "w"(a)
10257 : /* No clobbers */);
10258 return result;
10261 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10262 vmvn_u32 (uint32x2_t a)
10264 uint32x2_t result;
10265 __asm__ ("mvn %0.8b,%1.8b"
10266 : "=w"(result)
10267 : "w"(a)
10268 : /* No clobbers */);
10269 return result;
10272 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10273 vmvnq_p8 (poly8x16_t a)
10275 poly8x16_t result;
10276 __asm__ ("mvn %0.16b,%1.16b"
10277 : "=w"(result)
10278 : "w"(a)
10279 : /* No clobbers */);
10280 return result;
10283 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10284 vmvnq_s8 (int8x16_t a)
10286 int8x16_t result;
10287 __asm__ ("mvn %0.16b,%1.16b"
10288 : "=w"(result)
10289 : "w"(a)
10290 : /* No clobbers */);
10291 return result;
10294 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10295 vmvnq_s16 (int16x8_t a)
10297 int16x8_t result;
10298 __asm__ ("mvn %0.16b,%1.16b"
10299 : "=w"(result)
10300 : "w"(a)
10301 : /* No clobbers */);
10302 return result;
10305 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10306 vmvnq_s32 (int32x4_t a)
10308 int32x4_t result;
10309 __asm__ ("mvn %0.16b,%1.16b"
10310 : "=w"(result)
10311 : "w"(a)
10312 : /* No clobbers */);
10313 return result;
10316 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10317 vmvnq_u8 (uint8x16_t a)
10319 uint8x16_t result;
10320 __asm__ ("mvn %0.16b,%1.16b"
10321 : "=w"(result)
10322 : "w"(a)
10323 : /* No clobbers */);
10324 return result;
10327 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10328 vmvnq_u16 (uint16x8_t a)
10330 uint16x8_t result;
10331 __asm__ ("mvn %0.16b,%1.16b"
10332 : "=w"(result)
10333 : "w"(a)
10334 : /* No clobbers */);
10335 return result;
10338 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10339 vmvnq_u32 (uint32x4_t a)
10341 uint32x4_t result;
10342 __asm__ ("mvn %0.16b,%1.16b"
10343 : "=w"(result)
10344 : "w"(a)
10345 : /* No clobbers */);
10346 return result;
10349 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10350 vneg_f32 (float32x2_t a)
10352 float32x2_t result;
10353 __asm__ ("fneg %0.2s,%1.2s"
10354 : "=w"(result)
10355 : "w"(a)
10356 : /* No clobbers */);
10357 return result;
10360 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10361 vneg_s8 (int8x8_t a)
10363 int8x8_t result;
10364 __asm__ ("neg %0.8b,%1.8b"
10365 : "=w"(result)
10366 : "w"(a)
10367 : /* No clobbers */);
10368 return result;
10371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10372 vneg_s16 (int16x4_t a)
10374 int16x4_t result;
10375 __asm__ ("neg %0.4h,%1.4h"
10376 : "=w"(result)
10377 : "w"(a)
10378 : /* No clobbers */);
10379 return result;
10382 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10383 vneg_s32 (int32x2_t a)
10385 int32x2_t result;
10386 __asm__ ("neg %0.2s,%1.2s"
10387 : "=w"(result)
10388 : "w"(a)
10389 : /* No clobbers */);
10390 return result;
10393 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10394 vnegq_f32 (float32x4_t a)
10396 float32x4_t result;
10397 __asm__ ("fneg %0.4s,%1.4s"
10398 : "=w"(result)
10399 : "w"(a)
10400 : /* No clobbers */);
10401 return result;
10404 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10405 vnegq_f64 (float64x2_t a)
10407 float64x2_t result;
10408 __asm__ ("fneg %0.2d,%1.2d"
10409 : "=w"(result)
10410 : "w"(a)
10411 : /* No clobbers */);
10412 return result;
10415 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10416 vnegq_s8 (int8x16_t a)
10418 int8x16_t result;
10419 __asm__ ("neg %0.16b,%1.16b"
10420 : "=w"(result)
10421 : "w"(a)
10422 : /* No clobbers */);
10423 return result;
10426 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10427 vnegq_s16 (int16x8_t a)
10429 int16x8_t result;
10430 __asm__ ("neg %0.8h,%1.8h"
10431 : "=w"(result)
10432 : "w"(a)
10433 : /* No clobbers */);
10434 return result;
10437 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10438 vnegq_s32 (int32x4_t a)
10440 int32x4_t result;
10441 __asm__ ("neg %0.4s,%1.4s"
10442 : "=w"(result)
10443 : "w"(a)
10444 : /* No clobbers */);
10445 return result;
10448 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10449 vnegq_s64 (int64x2_t a)
10451 int64x2_t result;
10452 __asm__ ("neg %0.2d,%1.2d"
10453 : "=w"(result)
10454 : "w"(a)
10455 : /* No clobbers */);
10456 return result;
10459 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10460 vpadal_s8 (int16x4_t a, int8x8_t b)
10462 int16x4_t result;
10463 __asm__ ("sadalp %0.4h,%2.8b"
10464 : "=w"(result)
10465 : "0"(a), "w"(b)
10466 : /* No clobbers */);
10467 return result;
10470 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10471 vpadal_s16 (int32x2_t a, int16x4_t b)
10473 int32x2_t result;
10474 __asm__ ("sadalp %0.2s,%2.4h"
10475 : "=w"(result)
10476 : "0"(a), "w"(b)
10477 : /* No clobbers */);
10478 return result;
10481 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10482 vpadal_s32 (int64x1_t a, int32x2_t b)
10484 int64x1_t result;
10485 __asm__ ("sadalp %0.1d,%2.2s"
10486 : "=w"(result)
10487 : "0"(a), "w"(b)
10488 : /* No clobbers */);
10489 return result;
10492 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10493 vpadal_u8 (uint16x4_t a, uint8x8_t b)
10495 uint16x4_t result;
10496 __asm__ ("uadalp %0.4h,%2.8b"
10497 : "=w"(result)
10498 : "0"(a), "w"(b)
10499 : /* No clobbers */);
10500 return result;
10503 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10504 vpadal_u16 (uint32x2_t a, uint16x4_t b)
10506 uint32x2_t result;
10507 __asm__ ("uadalp %0.2s,%2.4h"
10508 : "=w"(result)
10509 : "0"(a), "w"(b)
10510 : /* No clobbers */);
10511 return result;
10514 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10515 vpadal_u32 (uint64x1_t a, uint32x2_t b)
10517 uint64x1_t result;
10518 __asm__ ("uadalp %0.1d,%2.2s"
10519 : "=w"(result)
10520 : "0"(a), "w"(b)
10521 : /* No clobbers */);
10522 return result;
10525 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10526 vpadalq_s8 (int16x8_t a, int8x16_t b)
10528 int16x8_t result;
10529 __asm__ ("sadalp %0.8h,%2.16b"
10530 : "=w"(result)
10531 : "0"(a), "w"(b)
10532 : /* No clobbers */);
10533 return result;
10536 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10537 vpadalq_s16 (int32x4_t a, int16x8_t b)
10539 int32x4_t result;
10540 __asm__ ("sadalp %0.4s,%2.8h"
10541 : "=w"(result)
10542 : "0"(a), "w"(b)
10543 : /* No clobbers */);
10544 return result;
10547 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10548 vpadalq_s32 (int64x2_t a, int32x4_t b)
10550 int64x2_t result;
10551 __asm__ ("sadalp %0.2d,%2.4s"
10552 : "=w"(result)
10553 : "0"(a), "w"(b)
10554 : /* No clobbers */);
10555 return result;
10558 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10559 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
10561 uint16x8_t result;
10562 __asm__ ("uadalp %0.8h,%2.16b"
10563 : "=w"(result)
10564 : "0"(a), "w"(b)
10565 : /* No clobbers */);
10566 return result;
10569 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10570 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
10572 uint32x4_t result;
10573 __asm__ ("uadalp %0.4s,%2.8h"
10574 : "=w"(result)
10575 : "0"(a), "w"(b)
10576 : /* No clobbers */);
10577 return result;
10580 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10581 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
10583 uint64x2_t result;
10584 __asm__ ("uadalp %0.2d,%2.4s"
10585 : "=w"(result)
10586 : "0"(a), "w"(b)
10587 : /* No clobbers */);
10588 return result;
10591 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10592 vpadd_f32 (float32x2_t a, float32x2_t b)
10594 float32x2_t result;
10595 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
10596 : "=w"(result)
10597 : "w"(a), "w"(b)
10598 : /* No clobbers */);
10599 return result;
10602 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10603 vpadd_s8 (int8x8_t __a, int8x8_t __b)
10605 return __builtin_aarch64_addpv8qi (__a, __b);
10608 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10609 vpadd_s16 (int16x4_t __a, int16x4_t __b)
10611 return __builtin_aarch64_addpv4hi (__a, __b);
10614 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10615 vpadd_s32 (int32x2_t __a, int32x2_t __b)
10617 return __builtin_aarch64_addpv2si (__a, __b);
10620 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10621 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
10623 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
10624 (int8x8_t) __b);
10627 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10628 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
10630 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
10631 (int16x4_t) __b);
10634 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10635 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
10637 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
10638 (int32x2_t) __b);
10641 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10642 vpaddd_f64 (float64x2_t a)
10644 float64_t result;
10645 __asm__ ("faddp %d0,%1.2d"
10646 : "=w"(result)
10647 : "w"(a)
10648 : /* No clobbers */);
10649 return result;
10652 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10653 vpaddl_s8 (int8x8_t a)
10655 int16x4_t result;
10656 __asm__ ("saddlp %0.4h,%1.8b"
10657 : "=w"(result)
10658 : "w"(a)
10659 : /* No clobbers */);
10660 return result;
10663 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10664 vpaddl_s16 (int16x4_t a)
10666 int32x2_t result;
10667 __asm__ ("saddlp %0.2s,%1.4h"
10668 : "=w"(result)
10669 : "w"(a)
10670 : /* No clobbers */);
10671 return result;
10674 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10675 vpaddl_s32 (int32x2_t a)
10677 int64x1_t result;
10678 __asm__ ("saddlp %0.1d,%1.2s"
10679 : "=w"(result)
10680 : "w"(a)
10681 : /* No clobbers */);
10682 return result;
10685 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10686 vpaddl_u8 (uint8x8_t a)
10688 uint16x4_t result;
10689 __asm__ ("uaddlp %0.4h,%1.8b"
10690 : "=w"(result)
10691 : "w"(a)
10692 : /* No clobbers */);
10693 return result;
10696 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10697 vpaddl_u16 (uint16x4_t a)
10699 uint32x2_t result;
10700 __asm__ ("uaddlp %0.2s,%1.4h"
10701 : "=w"(result)
10702 : "w"(a)
10703 : /* No clobbers */);
10704 return result;
10707 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10708 vpaddl_u32 (uint32x2_t a)
10710 uint64x1_t result;
10711 __asm__ ("uaddlp %0.1d,%1.2s"
10712 : "=w"(result)
10713 : "w"(a)
10714 : /* No clobbers */);
10715 return result;
10718 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10719 vpaddlq_s8 (int8x16_t a)
10721 int16x8_t result;
10722 __asm__ ("saddlp %0.8h,%1.16b"
10723 : "=w"(result)
10724 : "w"(a)
10725 : /* No clobbers */);
10726 return result;
10729 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10730 vpaddlq_s16 (int16x8_t a)
10732 int32x4_t result;
10733 __asm__ ("saddlp %0.4s,%1.8h"
10734 : "=w"(result)
10735 : "w"(a)
10736 : /* No clobbers */);
10737 return result;
10740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10741 vpaddlq_s32 (int32x4_t a)
10743 int64x2_t result;
10744 __asm__ ("saddlp %0.2d,%1.4s"
10745 : "=w"(result)
10746 : "w"(a)
10747 : /* No clobbers */);
10748 return result;
10751 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10752 vpaddlq_u8 (uint8x16_t a)
10754 uint16x8_t result;
10755 __asm__ ("uaddlp %0.8h,%1.16b"
10756 : "=w"(result)
10757 : "w"(a)
10758 : /* No clobbers */);
10759 return result;
10762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10763 vpaddlq_u16 (uint16x8_t a)
10765 uint32x4_t result;
10766 __asm__ ("uaddlp %0.4s,%1.8h"
10767 : "=w"(result)
10768 : "w"(a)
10769 : /* No clobbers */);
10770 return result;
10773 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10774 vpaddlq_u32 (uint32x4_t a)
10776 uint64x2_t result;
10777 __asm__ ("uaddlp %0.2d,%1.4s"
10778 : "=w"(result)
10779 : "w"(a)
10780 : /* No clobbers */);
10781 return result;
10784 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10785 vpaddq_f32 (float32x4_t a, float32x4_t b)
10787 float32x4_t result;
10788 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
10789 : "=w"(result)
10790 : "w"(a), "w"(b)
10791 : /* No clobbers */);
10792 return result;
10795 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10796 vpaddq_f64 (float64x2_t a, float64x2_t b)
10798 float64x2_t result;
10799 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
10800 : "=w"(result)
10801 : "w"(a), "w"(b)
10802 : /* No clobbers */);
10803 return result;
10806 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10807 vpaddq_s8 (int8x16_t a, int8x16_t b)
10809 int8x16_t result;
10810 __asm__ ("addp %0.16b,%1.16b,%2.16b"
10811 : "=w"(result)
10812 : "w"(a), "w"(b)
10813 : /* No clobbers */);
10814 return result;
10817 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10818 vpaddq_s16 (int16x8_t a, int16x8_t b)
10820 int16x8_t result;
10821 __asm__ ("addp %0.8h,%1.8h,%2.8h"
10822 : "=w"(result)
10823 : "w"(a), "w"(b)
10824 : /* No clobbers */);
10825 return result;
10828 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10829 vpaddq_s32 (int32x4_t a, int32x4_t b)
10831 int32x4_t result;
10832 __asm__ ("addp %0.4s,%1.4s,%2.4s"
10833 : "=w"(result)
10834 : "w"(a), "w"(b)
10835 : /* No clobbers */);
10836 return result;
10839 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10840 vpaddq_s64 (int64x2_t a, int64x2_t b)
10842 int64x2_t result;
10843 __asm__ ("addp %0.2d,%1.2d,%2.2d"
10844 : "=w"(result)
10845 : "w"(a), "w"(b)
10846 : /* No clobbers */);
10847 return result;
10850 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10851 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
10853 uint8x16_t result;
10854 __asm__ ("addp %0.16b,%1.16b,%2.16b"
10855 : "=w"(result)
10856 : "w"(a), "w"(b)
10857 : /* No clobbers */);
10858 return result;
10861 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10862 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
10864 uint16x8_t result;
10865 __asm__ ("addp %0.8h,%1.8h,%2.8h"
10866 : "=w"(result)
10867 : "w"(a), "w"(b)
10868 : /* No clobbers */);
10869 return result;
10872 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10873 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
10875 uint32x4_t result;
10876 __asm__ ("addp %0.4s,%1.4s,%2.4s"
10877 : "=w"(result)
10878 : "w"(a), "w"(b)
10879 : /* No clobbers */);
10880 return result;
10883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10884 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
10886 uint64x2_t result;
10887 __asm__ ("addp %0.2d,%1.2d,%2.2d"
10888 : "=w"(result)
10889 : "w"(a), "w"(b)
10890 : /* No clobbers */);
10891 return result;
10894 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10895 vpadds_f32 (float32x2_t a)
10897 float32_t result;
10898 __asm__ ("faddp %s0,%1.2s"
10899 : "=w"(result)
10900 : "w"(a)
10901 : /* No clobbers */);
10902 return result;
10905 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10906 vpmax_f32 (float32x2_t a, float32x2_t b)
10908 float32x2_t result;
10909 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
10910 : "=w"(result)
10911 : "w"(a), "w"(b)
10912 : /* No clobbers */);
10913 return result;
10916 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10917 vpmax_s8 (int8x8_t a, int8x8_t b)
10919 int8x8_t result;
10920 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
10921 : "=w"(result)
10922 : "w"(a), "w"(b)
10923 : /* No clobbers */);
10924 return result;
10927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10928 vpmax_s16 (int16x4_t a, int16x4_t b)
10930 int16x4_t result;
10931 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
10932 : "=w"(result)
10933 : "w"(a), "w"(b)
10934 : /* No clobbers */);
10935 return result;
10938 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10939 vpmax_s32 (int32x2_t a, int32x2_t b)
10941 int32x2_t result;
10942 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
10943 : "=w"(result)
10944 : "w"(a), "w"(b)
10945 : /* No clobbers */);
10946 return result;
10949 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10950 vpmax_u8 (uint8x8_t a, uint8x8_t b)
10952 uint8x8_t result;
10953 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
10954 : "=w"(result)
10955 : "w"(a), "w"(b)
10956 : /* No clobbers */);
10957 return result;
10960 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10961 vpmax_u16 (uint16x4_t a, uint16x4_t b)
10963 uint16x4_t result;
10964 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
10965 : "=w"(result)
10966 : "w"(a), "w"(b)
10967 : /* No clobbers */);
10968 return result;
10971 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10972 vpmax_u32 (uint32x2_t a, uint32x2_t b)
10974 uint32x2_t result;
10975 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
10976 : "=w"(result)
10977 : "w"(a), "w"(b)
10978 : /* No clobbers */);
10979 return result;
10982 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10983 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
10985 float32x2_t result;
10986 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
10987 : "=w"(result)
10988 : "w"(a), "w"(b)
10989 : /* No clobbers */);
10990 return result;
10993 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10994 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
10996 float32x4_t result;
10997 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
10998 : "=w"(result)
10999 : "w"(a), "w"(b)
11000 : /* No clobbers */);
11001 return result;
11004 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11005 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
11007 float64x2_t result;
11008 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
11009 : "=w"(result)
11010 : "w"(a), "w"(b)
11011 : /* No clobbers */);
11012 return result;
11015 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11016 vpmaxnmqd_f64 (float64x2_t a)
11018 float64_t result;
11019 __asm__ ("fmaxnmp %d0,%1.2d"
11020 : "=w"(result)
11021 : "w"(a)
11022 : /* No clobbers */);
11023 return result;
11026 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11027 vpmaxnms_f32 (float32x2_t a)
11029 float32_t result;
11030 __asm__ ("fmaxnmp %s0,%1.2s"
11031 : "=w"(result)
11032 : "w"(a)
11033 : /* No clobbers */);
11034 return result;
11037 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11038 vpmaxq_f32 (float32x4_t a, float32x4_t b)
11040 float32x4_t result;
11041 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
11042 : "=w"(result)
11043 : "w"(a), "w"(b)
11044 : /* No clobbers */);
11045 return result;
11048 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11049 vpmaxq_f64 (float64x2_t a, float64x2_t b)
11051 float64x2_t result;
11052 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
11053 : "=w"(result)
11054 : "w"(a), "w"(b)
11055 : /* No clobbers */);
11056 return result;
11059 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11060 vpmaxq_s8 (int8x16_t a, int8x16_t b)
11062 int8x16_t result;
11063 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
11064 : "=w"(result)
11065 : "w"(a), "w"(b)
11066 : /* No clobbers */);
11067 return result;
11070 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11071 vpmaxq_s16 (int16x8_t a, int16x8_t b)
11073 int16x8_t result;
11074 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
11075 : "=w"(result)
11076 : "w"(a), "w"(b)
11077 : /* No clobbers */);
11078 return result;
11081 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11082 vpmaxq_s32 (int32x4_t a, int32x4_t b)
11084 int32x4_t result;
11085 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
11086 : "=w"(result)
11087 : "w"(a), "w"(b)
11088 : /* No clobbers */);
11089 return result;
11092 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11093 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
11095 uint8x16_t result;
11096 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
11097 : "=w"(result)
11098 : "w"(a), "w"(b)
11099 : /* No clobbers */);
11100 return result;
11103 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11104 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
11106 uint16x8_t result;
11107 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
11108 : "=w"(result)
11109 : "w"(a), "w"(b)
11110 : /* No clobbers */);
11111 return result;
11114 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11115 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
11117 uint32x4_t result;
11118 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
11119 : "=w"(result)
11120 : "w"(a), "w"(b)
11121 : /* No clobbers */);
11122 return result;
11125 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11126 vpmaxqd_f64 (float64x2_t a)
11128 float64_t result;
11129 __asm__ ("fmaxp %d0,%1.2d"
11130 : "=w"(result)
11131 : "w"(a)
11132 : /* No clobbers */);
11133 return result;
11136 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11137 vpmaxs_f32 (float32x2_t a)
11139 float32_t result;
11140 __asm__ ("fmaxp %s0,%1.2s"
11141 : "=w"(result)
11142 : "w"(a)
11143 : /* No clobbers */);
11144 return result;
11147 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11148 vpmin_f32 (float32x2_t a, float32x2_t b)
11150 float32x2_t result;
11151 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
11152 : "=w"(result)
11153 : "w"(a), "w"(b)
11154 : /* No clobbers */);
11155 return result;
11158 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11159 vpmin_s8 (int8x8_t a, int8x8_t b)
11161 int8x8_t result;
11162 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
11163 : "=w"(result)
11164 : "w"(a), "w"(b)
11165 : /* No clobbers */);
11166 return result;
11169 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11170 vpmin_s16 (int16x4_t a, int16x4_t b)
11172 int16x4_t result;
11173 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
11174 : "=w"(result)
11175 : "w"(a), "w"(b)
11176 : /* No clobbers */);
11177 return result;
11180 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11181 vpmin_s32 (int32x2_t a, int32x2_t b)
11183 int32x2_t result;
11184 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
11185 : "=w"(result)
11186 : "w"(a), "w"(b)
11187 : /* No clobbers */);
11188 return result;
11191 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11192 vpmin_u8 (uint8x8_t a, uint8x8_t b)
11194 uint8x8_t result;
11195 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
11196 : "=w"(result)
11197 : "w"(a), "w"(b)
11198 : /* No clobbers */);
11199 return result;
11202 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11203 vpmin_u16 (uint16x4_t a, uint16x4_t b)
11205 uint16x4_t result;
11206 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
11207 : "=w"(result)
11208 : "w"(a), "w"(b)
11209 : /* No clobbers */);
11210 return result;
11213 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11214 vpmin_u32 (uint32x2_t a, uint32x2_t b)
11216 uint32x2_t result;
11217 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
11218 : "=w"(result)
11219 : "w"(a), "w"(b)
11220 : /* No clobbers */);
11221 return result;
11224 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11225 vpminnm_f32 (float32x2_t a, float32x2_t b)
11227 float32x2_t result;
11228 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
11229 : "=w"(result)
11230 : "w"(a), "w"(b)
11231 : /* No clobbers */);
11232 return result;
11235 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11236 vpminnmq_f32 (float32x4_t a, float32x4_t b)
11238 float32x4_t result;
11239 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
11240 : "=w"(result)
11241 : "w"(a), "w"(b)
11242 : /* No clobbers */);
11243 return result;
11246 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11247 vpminnmq_f64 (float64x2_t a, float64x2_t b)
11249 float64x2_t result;
11250 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
11251 : "=w"(result)
11252 : "w"(a), "w"(b)
11253 : /* No clobbers */);
11254 return result;
11257 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11258 vpminnmqd_f64 (float64x2_t a)
11260 float64_t result;
11261 __asm__ ("fminnmp %d0,%1.2d"
11262 : "=w"(result)
11263 : "w"(a)
11264 : /* No clobbers */);
11265 return result;
11268 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11269 vpminnms_f32 (float32x2_t a)
11271 float32_t result;
11272 __asm__ ("fminnmp %s0,%1.2s"
11273 : "=w"(result)
11274 : "w"(a)
11275 : /* No clobbers */);
11276 return result;
11279 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11280 vpminq_f32 (float32x4_t a, float32x4_t b)
11282 float32x4_t result;
11283 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
11284 : "=w"(result)
11285 : "w"(a), "w"(b)
11286 : /* No clobbers */);
11287 return result;
11290 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11291 vpminq_f64 (float64x2_t a, float64x2_t b)
11293 float64x2_t result;
11294 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
11295 : "=w"(result)
11296 : "w"(a), "w"(b)
11297 : /* No clobbers */);
11298 return result;
11301 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11302 vpminq_s8 (int8x16_t a, int8x16_t b)
11304 int8x16_t result;
11305 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
11306 : "=w"(result)
11307 : "w"(a), "w"(b)
11308 : /* No clobbers */);
11309 return result;
11312 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11313 vpminq_s16 (int16x8_t a, int16x8_t b)
11315 int16x8_t result;
11316 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
11317 : "=w"(result)
11318 : "w"(a), "w"(b)
11319 : /* No clobbers */);
11320 return result;
11323 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11324 vpminq_s32 (int32x4_t a, int32x4_t b)
11326 int32x4_t result;
11327 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
11328 : "=w"(result)
11329 : "w"(a), "w"(b)
11330 : /* No clobbers */);
11331 return result;
11334 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11335 vpminq_u8 (uint8x16_t a, uint8x16_t b)
11337 uint8x16_t result;
11338 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
11339 : "=w"(result)
11340 : "w"(a), "w"(b)
11341 : /* No clobbers */);
11342 return result;
11345 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11346 vpminq_u16 (uint16x8_t a, uint16x8_t b)
11348 uint16x8_t result;
11349 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
11350 : "=w"(result)
11351 : "w"(a), "w"(b)
11352 : /* No clobbers */);
11353 return result;
11356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11357 vpminq_u32 (uint32x4_t a, uint32x4_t b)
11359 uint32x4_t result;
11360 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
11361 : "=w"(result)
11362 : "w"(a), "w"(b)
11363 : /* No clobbers */);
11364 return result;
11367 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11368 vpminqd_f64 (float64x2_t a)
11370 float64_t result;
11371 __asm__ ("fminp %d0,%1.2d"
11372 : "=w"(result)
11373 : "w"(a)
11374 : /* No clobbers */);
11375 return result;
11378 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11379 vpmins_f32 (float32x2_t a)
11381 float32_t result;
11382 __asm__ ("fminp %s0,%1.2s"
11383 : "=w"(result)
11384 : "w"(a)
11385 : /* No clobbers */);
11386 return result;
11389 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11390 vqdmulh_n_s16 (int16x4_t a, int16_t b)
11392 int16x4_t result;
11393 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
11394 : "=w"(result)
11395 : "w"(a), "w"(b)
11396 : /* No clobbers */);
11397 return result;
11400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11401 vqdmulh_n_s32 (int32x2_t a, int32_t b)
11403 int32x2_t result;
11404 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
11405 : "=w"(result)
11406 : "w"(a), "w"(b)
11407 : /* No clobbers */);
11408 return result;
11411 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11412 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
11414 int16x8_t result;
11415 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
11416 : "=w"(result)
11417 : "w"(a), "w"(b)
11418 : /* No clobbers */);
11419 return result;
11422 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11423 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
11425 int32x4_t result;
11426 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
11427 : "=w"(result)
11428 : "w"(a), "w"(b)
11429 : /* No clobbers */);
11430 return result;
11433 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11434 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
11436 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11437 __asm__ ("sqxtn2 %0.16b, %1.8h"
11438 : "+w"(result)
11439 : "w"(b)
11440 : /* No clobbers */);
11441 return result;
11444 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11445 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
11447 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11448 __asm__ ("sqxtn2 %0.8h, %1.4s"
11449 : "+w"(result)
11450 : "w"(b)
11451 : /* No clobbers */);
11452 return result;
11455 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11456 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
11458 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11459 __asm__ ("sqxtn2 %0.4s, %1.2d"
11460 : "+w"(result)
11461 : "w"(b)
11462 : /* No clobbers */);
11463 return result;
11466 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11467 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
11469 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11470 __asm__ ("uqxtn2 %0.16b, %1.8h"
11471 : "+w"(result)
11472 : "w"(b)
11473 : /* No clobbers */);
11474 return result;
11477 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11478 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
11480 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11481 __asm__ ("uqxtn2 %0.8h, %1.4s"
11482 : "+w"(result)
11483 : "w"(b)
11484 : /* No clobbers */);
11485 return result;
11488 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11489 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
11491 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11492 __asm__ ("uqxtn2 %0.4s, %1.2d"
11493 : "+w"(result)
11494 : "w"(b)
11495 : /* No clobbers */);
11496 return result;
11499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11500 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
11502 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11503 __asm__ ("sqxtun2 %0.16b, %1.8h"
11504 : "+w"(result)
11505 : "w"(b)
11506 : /* No clobbers */);
11507 return result;
11510 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11511 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
11513 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11514 __asm__ ("sqxtun2 %0.8h, %1.4s"
11515 : "+w"(result)
11516 : "w"(b)
11517 : /* No clobbers */);
11518 return result;
11521 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11522 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
11524 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11525 __asm__ ("sqxtun2 %0.4s, %1.2d"
11526 : "+w"(result)
11527 : "w"(b)
11528 : /* No clobbers */);
11529 return result;
11532 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11533 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
11535 int16x4_t result;
11536 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
11537 : "=w"(result)
11538 : "w"(a), "x"(b)
11539 : /* No clobbers */);
11540 return result;
11543 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11544 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
11546 int32x2_t result;
11547 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
11548 : "=w"(result)
11549 : "w"(a), "w"(b)
11550 : /* No clobbers */);
11551 return result;
11554 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11555 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
11557 int16x8_t result;
11558 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
11559 : "=w"(result)
11560 : "w"(a), "x"(b)
11561 : /* No clobbers */);
11562 return result;
11565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11566 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
11568 int32x4_t result;
11569 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
11570 : "=w"(result)
11571 : "w"(a), "w"(b)
11572 : /* No clobbers */);
11573 return result;
11576 #define vqrshrn_high_n_s16(a, b, c) \
11577 __extension__ \
11578 ({ \
11579 int16x8_t b_ = (b); \
11580 int8x8_t a_ = (a); \
11581 int8x16_t result = vcombine_s8 \
11582 (a_, vcreate_s8 \
11583 (__AARCH64_UINT64_C (0x0))); \
11584 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
11585 : "+w"(result) \
11586 : "w"(b_), "i"(c) \
11587 : /* No clobbers */); \
11588 result; \
11591 #define vqrshrn_high_n_s32(a, b, c) \
11592 __extension__ \
11593 ({ \
11594 int32x4_t b_ = (b); \
11595 int16x4_t a_ = (a); \
11596 int16x8_t result = vcombine_s16 \
11597 (a_, vcreate_s16 \
11598 (__AARCH64_UINT64_C (0x0))); \
11599 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
11600 : "+w"(result) \
11601 : "w"(b_), "i"(c) \
11602 : /* No clobbers */); \
11603 result; \
11606 #define vqrshrn_high_n_s64(a, b, c) \
11607 __extension__ \
11608 ({ \
11609 int64x2_t b_ = (b); \
11610 int32x2_t a_ = (a); \
11611 int32x4_t result = vcombine_s32 \
11612 (a_, vcreate_s32 \
11613 (__AARCH64_UINT64_C (0x0))); \
11614 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
11615 : "+w"(result) \
11616 : "w"(b_), "i"(c) \
11617 : /* No clobbers */); \
11618 result; \
11621 #define vqrshrn_high_n_u16(a, b, c) \
11622 __extension__ \
11623 ({ \
11624 uint16x8_t b_ = (b); \
11625 uint8x8_t a_ = (a); \
11626 uint8x16_t result = vcombine_u8 \
11627 (a_, vcreate_u8 \
11628 (__AARCH64_UINT64_C (0x0))); \
11629 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
11630 : "+w"(result) \
11631 : "w"(b_), "i"(c) \
11632 : /* No clobbers */); \
11633 result; \
11636 #define vqrshrn_high_n_u32(a, b, c) \
11637 __extension__ \
11638 ({ \
11639 uint32x4_t b_ = (b); \
11640 uint16x4_t a_ = (a); \
11641 uint16x8_t result = vcombine_u16 \
11642 (a_, vcreate_u16 \
11643 (__AARCH64_UINT64_C (0x0))); \
11644 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
11645 : "+w"(result) \
11646 : "w"(b_), "i"(c) \
11647 : /* No clobbers */); \
11648 result; \
11651 #define vqrshrn_high_n_u64(a, b, c) \
11652 __extension__ \
11653 ({ \
11654 uint64x2_t b_ = (b); \
11655 uint32x2_t a_ = (a); \
11656 uint32x4_t result = vcombine_u32 \
11657 (a_, vcreate_u32 \
11658 (__AARCH64_UINT64_C (0x0))); \
11659 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
11660 : "+w"(result) \
11661 : "w"(b_), "i"(c) \
11662 : /* No clobbers */); \
11663 result; \
11666 #define vqrshrun_high_n_s16(a, b, c) \
11667 __extension__ \
11668 ({ \
11669 int16x8_t b_ = (b); \
11670 uint8x8_t a_ = (a); \
11671 uint8x16_t result = vcombine_u8 \
11672 (a_, vcreate_u8 \
11673 (__AARCH64_UINT64_C (0x0))); \
11674 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
11675 : "+w"(result) \
11676 : "w"(b_), "i"(c) \
11677 : /* No clobbers */); \
11678 result; \
11681 #define vqrshrun_high_n_s32(a, b, c) \
11682 __extension__ \
11683 ({ \
11684 int32x4_t b_ = (b); \
11685 uint16x4_t a_ = (a); \
11686 uint16x8_t result = vcombine_u16 \
11687 (a_, vcreate_u16 \
11688 (__AARCH64_UINT64_C (0x0))); \
11689 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
11690 : "+w"(result) \
11691 : "w"(b_), "i"(c) \
11692 : /* No clobbers */); \
11693 result; \
11696 #define vqrshrun_high_n_s64(a, b, c) \
11697 __extension__ \
11698 ({ \
11699 int64x2_t b_ = (b); \
11700 uint32x2_t a_ = (a); \
11701 uint32x4_t result = vcombine_u32 \
11702 (a_, vcreate_u32 \
11703 (__AARCH64_UINT64_C (0x0))); \
11704 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
11705 : "+w"(result) \
11706 : "w"(b_), "i"(c) \
11707 : /* No clobbers */); \
11708 result; \
11711 #define vqshrn_high_n_s16(a, b, c) \
11712 __extension__ \
11713 ({ \
11714 int16x8_t b_ = (b); \
11715 int8x8_t a_ = (a); \
11716 int8x16_t result = vcombine_s8 \
11717 (a_, vcreate_s8 \
11718 (__AARCH64_UINT64_C (0x0))); \
11719 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
11720 : "+w"(result) \
11721 : "w"(b_), "i"(c) \
11722 : /* No clobbers */); \
11723 result; \
11726 #define vqshrn_high_n_s32(a, b, c) \
11727 __extension__ \
11728 ({ \
11729 int32x4_t b_ = (b); \
11730 int16x4_t a_ = (a); \
11731 int16x8_t result = vcombine_s16 \
11732 (a_, vcreate_s16 \
11733 (__AARCH64_UINT64_C (0x0))); \
11734 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
11735 : "+w"(result) \
11736 : "w"(b_), "i"(c) \
11737 : /* No clobbers */); \
11738 result; \
11741 #define vqshrn_high_n_s64(a, b, c) \
11742 __extension__ \
11743 ({ \
11744 int64x2_t b_ = (b); \
11745 int32x2_t a_ = (a); \
11746 int32x4_t result = vcombine_s32 \
11747 (a_, vcreate_s32 \
11748 (__AARCH64_UINT64_C (0x0))); \
11749 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
11750 : "+w"(result) \
11751 : "w"(b_), "i"(c) \
11752 : /* No clobbers */); \
11753 result; \
11756 #define vqshrn_high_n_u16(a, b, c) \
11757 __extension__ \
11758 ({ \
11759 uint16x8_t b_ = (b); \
11760 uint8x8_t a_ = (a); \
11761 uint8x16_t result = vcombine_u8 \
11762 (a_, vcreate_u8 \
11763 (__AARCH64_UINT64_C (0x0))); \
11764 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
11765 : "+w"(result) \
11766 : "w"(b_), "i"(c) \
11767 : /* No clobbers */); \
11768 result; \
11771 #define vqshrn_high_n_u32(a, b, c) \
11772 __extension__ \
11773 ({ \
11774 uint32x4_t b_ = (b); \
11775 uint16x4_t a_ = (a); \
11776 uint16x8_t result = vcombine_u16 \
11777 (a_, vcreate_u16 \
11778 (__AARCH64_UINT64_C (0x0))); \
11779 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
11780 : "+w"(result) \
11781 : "w"(b_), "i"(c) \
11782 : /* No clobbers */); \
11783 result; \
11786 #define vqshrn_high_n_u64(a, b, c) \
11787 __extension__ \
11788 ({ \
11789 uint64x2_t b_ = (b); \
11790 uint32x2_t a_ = (a); \
11791 uint32x4_t result = vcombine_u32 \
11792 (a_, vcreate_u32 \
11793 (__AARCH64_UINT64_C (0x0))); \
11794 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
11795 : "+w"(result) \
11796 : "w"(b_), "i"(c) \
11797 : /* No clobbers */); \
11798 result; \
11801 #define vqshrun_high_n_s16(a, b, c) \
11802 __extension__ \
11803 ({ \
11804 int16x8_t b_ = (b); \
11805 uint8x8_t a_ = (a); \
11806 uint8x16_t result = vcombine_u8 \
11807 (a_, vcreate_u8 \
11808 (__AARCH64_UINT64_C (0x0))); \
11809 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
11810 : "+w"(result) \
11811 : "w"(b_), "i"(c) \
11812 : /* No clobbers */); \
11813 result; \
11816 #define vqshrun_high_n_s32(a, b, c) \
11817 __extension__ \
11818 ({ \
11819 int32x4_t b_ = (b); \
11820 uint16x4_t a_ = (a); \
11821 uint16x8_t result = vcombine_u16 \
11822 (a_, vcreate_u16 \
11823 (__AARCH64_UINT64_C (0x0))); \
11824 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
11825 : "+w"(result) \
11826 : "w"(b_), "i"(c) \
11827 : /* No clobbers */); \
11828 result; \
11831 #define vqshrun_high_n_s64(a, b, c) \
11832 __extension__ \
11833 ({ \
11834 int64x2_t b_ = (b); \
11835 uint32x2_t a_ = (a); \
11836 uint32x4_t result = vcombine_u32 \
11837 (a_, vcreate_u32 \
11838 (__AARCH64_UINT64_C (0x0))); \
11839 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
11840 : "+w"(result) \
11841 : "w"(b_), "i"(c) \
11842 : /* No clobbers */); \
11843 result; \
11846 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11847 vrbit_s8 (int8x8_t a)
11849 int8x8_t result;
11850 __asm__ ("rbit %0.8b,%1.8b"
11851 : "=w"(result)
11852 : "w"(a)
11853 : /* No clobbers */);
11854 return result;
11857 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11858 vrbit_u8 (uint8x8_t a)
11860 uint8x8_t result;
11861 __asm__ ("rbit %0.8b,%1.8b"
11862 : "=w"(result)
11863 : "w"(a)
11864 : /* No clobbers */);
11865 return result;
11868 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11869 vrbitq_s8 (int8x16_t a)
11871 int8x16_t result;
11872 __asm__ ("rbit %0.16b,%1.16b"
11873 : "=w"(result)
11874 : "w"(a)
11875 : /* No clobbers */);
11876 return result;
11879 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11880 vrbitq_u8 (uint8x16_t a)
11882 uint8x16_t result;
11883 __asm__ ("rbit %0.16b,%1.16b"
11884 : "=w"(result)
11885 : "w"(a)
11886 : /* No clobbers */);
11887 return result;
11890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11891 vrecpe_u32 (uint32x2_t a)
11893 uint32x2_t result;
11894 __asm__ ("urecpe %0.2s,%1.2s"
11895 : "=w"(result)
11896 : "w"(a)
11897 : /* No clobbers */);
11898 return result;
11901 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11902 vrecpeq_u32 (uint32x4_t a)
11904 uint32x4_t result;
11905 __asm__ ("urecpe %0.4s,%1.4s"
11906 : "=w"(result)
11907 : "w"(a)
11908 : /* No clobbers */);
11909 return result;
11912 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11913 vrev16_p8 (poly8x8_t a)
11915 poly8x8_t result;
11916 __asm__ ("rev16 %0.8b,%1.8b"
11917 : "=w"(result)
11918 : "w"(a)
11919 : /* No clobbers */);
11920 return result;
11923 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11924 vrev16_s8 (int8x8_t a)
11926 int8x8_t result;
11927 __asm__ ("rev16 %0.8b,%1.8b"
11928 : "=w"(result)
11929 : "w"(a)
11930 : /* No clobbers */);
11931 return result;
11934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11935 vrev16_u8 (uint8x8_t a)
11937 uint8x8_t result;
11938 __asm__ ("rev16 %0.8b,%1.8b"
11939 : "=w"(result)
11940 : "w"(a)
11941 : /* No clobbers */);
11942 return result;
11945 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11946 vrev16q_p8 (poly8x16_t a)
11948 poly8x16_t result;
11949 __asm__ ("rev16 %0.16b,%1.16b"
11950 : "=w"(result)
11951 : "w"(a)
11952 : /* No clobbers */);
11953 return result;
11956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11957 vrev16q_s8 (int8x16_t a)
11959 int8x16_t result;
11960 __asm__ ("rev16 %0.16b,%1.16b"
11961 : "=w"(result)
11962 : "w"(a)
11963 : /* No clobbers */);
11964 return result;
11967 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11968 vrev16q_u8 (uint8x16_t a)
11970 uint8x16_t result;
11971 __asm__ ("rev16 %0.16b,%1.16b"
11972 : "=w"(result)
11973 : "w"(a)
11974 : /* No clobbers */);
11975 return result;
11978 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11979 vrev32_p8 (poly8x8_t a)
11981 poly8x8_t result;
11982 __asm__ ("rev32 %0.8b,%1.8b"
11983 : "=w"(result)
11984 : "w"(a)
11985 : /* No clobbers */);
11986 return result;
11989 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11990 vrev32_p16 (poly16x4_t a)
11992 poly16x4_t result;
11993 __asm__ ("rev32 %0.4h,%1.4h"
11994 : "=w"(result)
11995 : "w"(a)
11996 : /* No clobbers */);
11997 return result;
12000 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12001 vrev32_s8 (int8x8_t a)
12003 int8x8_t result;
12004 __asm__ ("rev32 %0.8b,%1.8b"
12005 : "=w"(result)
12006 : "w"(a)
12007 : /* No clobbers */);
12008 return result;
12011 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12012 vrev32_s16 (int16x4_t a)
12014 int16x4_t result;
12015 __asm__ ("rev32 %0.4h,%1.4h"
12016 : "=w"(result)
12017 : "w"(a)
12018 : /* No clobbers */);
12019 return result;
12022 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12023 vrev32_u8 (uint8x8_t a)
12025 uint8x8_t result;
12026 __asm__ ("rev32 %0.8b,%1.8b"
12027 : "=w"(result)
12028 : "w"(a)
12029 : /* No clobbers */);
12030 return result;
12033 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12034 vrev32_u16 (uint16x4_t a)
12036 uint16x4_t result;
12037 __asm__ ("rev32 %0.4h,%1.4h"
12038 : "=w"(result)
12039 : "w"(a)
12040 : /* No clobbers */);
12041 return result;
12044 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12045 vrev32q_p8 (poly8x16_t a)
12047 poly8x16_t result;
12048 __asm__ ("rev32 %0.16b,%1.16b"
12049 : "=w"(result)
12050 : "w"(a)
12051 : /* No clobbers */);
12052 return result;
12055 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12056 vrev32q_p16 (poly16x8_t a)
12058 poly16x8_t result;
12059 __asm__ ("rev32 %0.8h,%1.8h"
12060 : "=w"(result)
12061 : "w"(a)
12062 : /* No clobbers */);
12063 return result;
12066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12067 vrev32q_s8 (int8x16_t a)
12069 int8x16_t result;
12070 __asm__ ("rev32 %0.16b,%1.16b"
12071 : "=w"(result)
12072 : "w"(a)
12073 : /* No clobbers */);
12074 return result;
12077 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12078 vrev32q_s16 (int16x8_t a)
12080 int16x8_t result;
12081 __asm__ ("rev32 %0.8h,%1.8h"
12082 : "=w"(result)
12083 : "w"(a)
12084 : /* No clobbers */);
12085 return result;
12088 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12089 vrev32q_u8 (uint8x16_t a)
12091 uint8x16_t result;
12092 __asm__ ("rev32 %0.16b,%1.16b"
12093 : "=w"(result)
12094 : "w"(a)
12095 : /* No clobbers */);
12096 return result;
12099 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12100 vrev32q_u16 (uint16x8_t a)
12102 uint16x8_t result;
12103 __asm__ ("rev32 %0.8h,%1.8h"
12104 : "=w"(result)
12105 : "w"(a)
12106 : /* No clobbers */);
12107 return result;
12110 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12111 vrev64_f32 (float32x2_t a)
12113 float32x2_t result;
12114 __asm__ ("rev64 %0.2s,%1.2s"
12115 : "=w"(result)
12116 : "w"(a)
12117 : /* No clobbers */);
12118 return result;
12121 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12122 vrev64_p8 (poly8x8_t a)
12124 poly8x8_t result;
12125 __asm__ ("rev64 %0.8b,%1.8b"
12126 : "=w"(result)
12127 : "w"(a)
12128 : /* No clobbers */);
12129 return result;
12132 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12133 vrev64_p16 (poly16x4_t a)
12135 poly16x4_t result;
12136 __asm__ ("rev64 %0.4h,%1.4h"
12137 : "=w"(result)
12138 : "w"(a)
12139 : /* No clobbers */);
12140 return result;
12143 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12144 vrev64_s8 (int8x8_t a)
12146 int8x8_t result;
12147 __asm__ ("rev64 %0.8b,%1.8b"
12148 : "=w"(result)
12149 : "w"(a)
12150 : /* No clobbers */);
12151 return result;
12154 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12155 vrev64_s16 (int16x4_t a)
12157 int16x4_t result;
12158 __asm__ ("rev64 %0.4h,%1.4h"
12159 : "=w"(result)
12160 : "w"(a)
12161 : /* No clobbers */);
12162 return result;
12165 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12166 vrev64_s32 (int32x2_t a)
12168 int32x2_t result;
12169 __asm__ ("rev64 %0.2s,%1.2s"
12170 : "=w"(result)
12171 : "w"(a)
12172 : /* No clobbers */);
12173 return result;
12176 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12177 vrev64_u8 (uint8x8_t a)
12179 uint8x8_t result;
12180 __asm__ ("rev64 %0.8b,%1.8b"
12181 : "=w"(result)
12182 : "w"(a)
12183 : /* No clobbers */);
12184 return result;
12187 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12188 vrev64_u16 (uint16x4_t a)
12190 uint16x4_t result;
12191 __asm__ ("rev64 %0.4h,%1.4h"
12192 : "=w"(result)
12193 : "w"(a)
12194 : /* No clobbers */);
12195 return result;
12198 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12199 vrev64_u32 (uint32x2_t a)
12201 uint32x2_t result;
12202 __asm__ ("rev64 %0.2s,%1.2s"
12203 : "=w"(result)
12204 : "w"(a)
12205 : /* No clobbers */);
12206 return result;
12209 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12210 vrev64q_f32 (float32x4_t a)
12212 float32x4_t result;
12213 __asm__ ("rev64 %0.4s,%1.4s"
12214 : "=w"(result)
12215 : "w"(a)
12216 : /* No clobbers */);
12217 return result;
12220 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12221 vrev64q_p8 (poly8x16_t a)
12223 poly8x16_t result;
12224 __asm__ ("rev64 %0.16b,%1.16b"
12225 : "=w"(result)
12226 : "w"(a)
12227 : /* No clobbers */);
12228 return result;
12231 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12232 vrev64q_p16 (poly16x8_t a)
12234 poly16x8_t result;
12235 __asm__ ("rev64 %0.8h,%1.8h"
12236 : "=w"(result)
12237 : "w"(a)
12238 : /* No clobbers */);
12239 return result;
12242 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12243 vrev64q_s8 (int8x16_t a)
12245 int8x16_t result;
12246 __asm__ ("rev64 %0.16b,%1.16b"
12247 : "=w"(result)
12248 : "w"(a)
12249 : /* No clobbers */);
12250 return result;
12253 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12254 vrev64q_s16 (int16x8_t a)
12256 int16x8_t result;
12257 __asm__ ("rev64 %0.8h,%1.8h"
12258 : "=w"(result)
12259 : "w"(a)
12260 : /* No clobbers */);
12261 return result;
12264 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12265 vrev64q_s32 (int32x4_t a)
12267 int32x4_t result;
12268 __asm__ ("rev64 %0.4s,%1.4s"
12269 : "=w"(result)
12270 : "w"(a)
12271 : /* No clobbers */);
12272 return result;
12275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12276 vrev64q_u8 (uint8x16_t a)
12278 uint8x16_t result;
12279 __asm__ ("rev64 %0.16b,%1.16b"
12280 : "=w"(result)
12281 : "w"(a)
12282 : /* No clobbers */);
12283 return result;
12286 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12287 vrev64q_u16 (uint16x8_t a)
12289 uint16x8_t result;
12290 __asm__ ("rev64 %0.8h,%1.8h"
12291 : "=w"(result)
12292 : "w"(a)
12293 : /* No clobbers */);
12294 return result;
12297 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12298 vrev64q_u32 (uint32x4_t a)
12300 uint32x4_t result;
12301 __asm__ ("rev64 %0.4s,%1.4s"
12302 : "=w"(result)
12303 : "w"(a)
12304 : /* No clobbers */);
12305 return result;
12308 #define vrshrn_high_n_s16(a, b, c) \
12309 __extension__ \
12310 ({ \
12311 int16x8_t b_ = (b); \
12312 int8x8_t a_ = (a); \
12313 int8x16_t result = vcombine_s8 \
12314 (a_, vcreate_s8 \
12315 (__AARCH64_UINT64_C (0x0))); \
12316 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
12317 : "+w"(result) \
12318 : "w"(b_), "i"(c) \
12319 : /* No clobbers */); \
12320 result; \
12323 #define vrshrn_high_n_s32(a, b, c) \
12324 __extension__ \
12325 ({ \
12326 int32x4_t b_ = (b); \
12327 int16x4_t a_ = (a); \
12328 int16x8_t result = vcombine_s16 \
12329 (a_, vcreate_s16 \
12330 (__AARCH64_UINT64_C (0x0))); \
12331 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
12332 : "+w"(result) \
12333 : "w"(b_), "i"(c) \
12334 : /* No clobbers */); \
12335 result; \
12338 #define vrshrn_high_n_s64(a, b, c) \
12339 __extension__ \
12340 ({ \
12341 int64x2_t b_ = (b); \
12342 int32x2_t a_ = (a); \
12343 int32x4_t result = vcombine_s32 \
12344 (a_, vcreate_s32 \
12345 (__AARCH64_UINT64_C (0x0))); \
12346 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
12347 : "+w"(result) \
12348 : "w"(b_), "i"(c) \
12349 : /* No clobbers */); \
12350 result; \
12353 #define vrshrn_high_n_u16(a, b, c) \
12354 __extension__ \
12355 ({ \
12356 uint16x8_t b_ = (b); \
12357 uint8x8_t a_ = (a); \
12358 uint8x16_t result = vcombine_u8 \
12359 (a_, vcreate_u8 \
12360 (__AARCH64_UINT64_C (0x0))); \
12361 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
12362 : "+w"(result) \
12363 : "w"(b_), "i"(c) \
12364 : /* No clobbers */); \
12365 result; \
12368 #define vrshrn_high_n_u32(a, b, c) \
12369 __extension__ \
12370 ({ \
12371 uint32x4_t b_ = (b); \
12372 uint16x4_t a_ = (a); \
12373 uint16x8_t result = vcombine_u16 \
12374 (a_, vcreate_u16 \
12375 (__AARCH64_UINT64_C (0x0))); \
12376 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
12377 : "+w"(result) \
12378 : "w"(b_), "i"(c) \
12379 : /* No clobbers */); \
12380 result; \
12383 #define vrshrn_high_n_u64(a, b, c) \
12384 __extension__ \
12385 ({ \
12386 uint64x2_t b_ = (b); \
12387 uint32x2_t a_ = (a); \
12388 uint32x4_t result = vcombine_u32 \
12389 (a_, vcreate_u32 \
12390 (__AARCH64_UINT64_C (0x0))); \
12391 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
12392 : "+w"(result) \
12393 : "w"(b_), "i"(c) \
12394 : /* No clobbers */); \
12395 result; \
12398 #define vrshrn_n_s16(a, b) \
12399 __extension__ \
12400 ({ \
12401 int16x8_t a_ = (a); \
12402 int8x8_t result; \
12403 __asm__ ("rshrn %0.8b,%1.8h,%2" \
12404 : "=w"(result) \
12405 : "w"(a_), "i"(b) \
12406 : /* No clobbers */); \
12407 result; \
12410 #define vrshrn_n_s32(a, b) \
12411 __extension__ \
12412 ({ \
12413 int32x4_t a_ = (a); \
12414 int16x4_t result; \
12415 __asm__ ("rshrn %0.4h,%1.4s,%2" \
12416 : "=w"(result) \
12417 : "w"(a_), "i"(b) \
12418 : /* No clobbers */); \
12419 result; \
12422 #define vrshrn_n_s64(a, b) \
12423 __extension__ \
12424 ({ \
12425 int64x2_t a_ = (a); \
12426 int32x2_t result; \
12427 __asm__ ("rshrn %0.2s,%1.2d,%2" \
12428 : "=w"(result) \
12429 : "w"(a_), "i"(b) \
12430 : /* No clobbers */); \
12431 result; \
12434 #define vrshrn_n_u16(a, b) \
12435 __extension__ \
12436 ({ \
12437 uint16x8_t a_ = (a); \
12438 uint8x8_t result; \
12439 __asm__ ("rshrn %0.8b,%1.8h,%2" \
12440 : "=w"(result) \
12441 : "w"(a_), "i"(b) \
12442 : /* No clobbers */); \
12443 result; \
12446 #define vrshrn_n_u32(a, b) \
12447 __extension__ \
12448 ({ \
12449 uint32x4_t a_ = (a); \
12450 uint16x4_t result; \
12451 __asm__ ("rshrn %0.4h,%1.4s,%2" \
12452 : "=w"(result) \
12453 : "w"(a_), "i"(b) \
12454 : /* No clobbers */); \
12455 result; \
12458 #define vrshrn_n_u64(a, b) \
12459 __extension__ \
12460 ({ \
12461 uint64x2_t a_ = (a); \
12462 uint32x2_t result; \
12463 __asm__ ("rshrn %0.2s,%1.2d,%2" \
12464 : "=w"(result) \
12465 : "w"(a_), "i"(b) \
12466 : /* No clobbers */); \
12467 result; \
12470 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12471 vrsqrte_f32 (float32x2_t a)
12473 float32x2_t result;
12474 __asm__ ("frsqrte %0.2s,%1.2s"
12475 : "=w"(result)
12476 : "w"(a)
12477 : /* No clobbers */);
12478 return result;
12481 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12482 vrsqrte_f64 (float64x1_t a)
12484 float64x1_t result;
12485 __asm__ ("frsqrte %d0,%d1"
12486 : "=w"(result)
12487 : "w"(a)
12488 : /* No clobbers */);
12489 return result;
12492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12493 vrsqrte_u32 (uint32x2_t a)
12495 uint32x2_t result;
12496 __asm__ ("ursqrte %0.2s,%1.2s"
12497 : "=w"(result)
12498 : "w"(a)
12499 : /* No clobbers */);
12500 return result;
12503 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12504 vrsqrted_f64 (float64_t a)
12506 float64_t result;
12507 __asm__ ("frsqrte %d0,%d1"
12508 : "=w"(result)
12509 : "w"(a)
12510 : /* No clobbers */);
12511 return result;
12514 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12515 vrsqrteq_f32 (float32x4_t a)
12517 float32x4_t result;
12518 __asm__ ("frsqrte %0.4s,%1.4s"
12519 : "=w"(result)
12520 : "w"(a)
12521 : /* No clobbers */);
12522 return result;
12525 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12526 vrsqrteq_f64 (float64x2_t a)
12528 float64x2_t result;
12529 __asm__ ("frsqrte %0.2d,%1.2d"
12530 : "=w"(result)
12531 : "w"(a)
12532 : /* No clobbers */);
12533 return result;
12536 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12537 vrsqrteq_u32 (uint32x4_t a)
12539 uint32x4_t result;
12540 __asm__ ("ursqrte %0.4s,%1.4s"
12541 : "=w"(result)
12542 : "w"(a)
12543 : /* No clobbers */);
12544 return result;
12547 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12548 vrsqrtes_f32 (float32_t a)
12550 float32_t result;
12551 __asm__ ("frsqrte %s0,%s1"
12552 : "=w"(result)
12553 : "w"(a)
12554 : /* No clobbers */);
12555 return result;
12558 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12559 vrsqrts_f32 (float32x2_t a, float32x2_t b)
12561 float32x2_t result;
12562 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
12563 : "=w"(result)
12564 : "w"(a), "w"(b)
12565 : /* No clobbers */);
12566 return result;
12569 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12570 vrsqrtsd_f64 (float64_t a, float64_t b)
12572 float64_t result;
12573 __asm__ ("frsqrts %d0,%d1,%d2"
12574 : "=w"(result)
12575 : "w"(a), "w"(b)
12576 : /* No clobbers */);
12577 return result;
12580 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12581 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
12583 float32x4_t result;
12584 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
12585 : "=w"(result)
12586 : "w"(a), "w"(b)
12587 : /* No clobbers */);
12588 return result;
12591 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12592 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
12594 float64x2_t result;
12595 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
12596 : "=w"(result)
12597 : "w"(a), "w"(b)
12598 : /* No clobbers */);
12599 return result;
12602 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12603 vrsqrtss_f32 (float32_t a, float32_t b)
12605 float32_t result;
12606 __asm__ ("frsqrts %s0,%s1,%s2"
12607 : "=w"(result)
12608 : "w"(a), "w"(b)
12609 : /* No clobbers */);
12610 return result;
12613 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12614 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
12616 float64x2_t result;
12617 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
12618 : "=w"(result)
12619 : "w"(a), "w"(b)
12620 : /* No clobbers */);
12621 return result;
12624 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12625 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12627 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12628 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
12629 : "+w"(result)
12630 : "w"(b), "w"(c)
12631 : /* No clobbers */);
12632 return result;
12635 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12636 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12638 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12639 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
12640 : "+w"(result)
12641 : "w"(b), "w"(c)
12642 : /* No clobbers */);
12643 return result;
12646 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12647 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12649 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12650 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
12651 : "+w"(result)
12652 : "w"(b), "w"(c)
12653 : /* No clobbers */);
12654 return result;
12657 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12658 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12660 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12661 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
12662 : "+w"(result)
12663 : "w"(b), "w"(c)
12664 : /* No clobbers */);
12665 return result;
12668 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12669 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12671 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12672 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
12673 : "+w"(result)
12674 : "w"(b), "w"(c)
12675 : /* No clobbers */);
12676 return result;
12679 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12680 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12682 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12683 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
12684 : "+w"(result)
12685 : "w"(b), "w"(c)
12686 : /* No clobbers */);
12687 return result;
12690 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12691 vrsubhn_s16 (int16x8_t a, int16x8_t b)
12693 int8x8_t result;
12694 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
12695 : "=w"(result)
12696 : "w"(a), "w"(b)
12697 : /* No clobbers */);
12698 return result;
12701 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12702 vrsubhn_s32 (int32x4_t a, int32x4_t b)
12704 int16x4_t result;
12705 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
12706 : "=w"(result)
12707 : "w"(a), "w"(b)
12708 : /* No clobbers */);
12709 return result;
12712 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12713 vrsubhn_s64 (int64x2_t a, int64x2_t b)
12715 int32x2_t result;
12716 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
12717 : "=w"(result)
12718 : "w"(a), "w"(b)
12719 : /* No clobbers */);
12720 return result;
12723 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12724 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
12726 uint8x8_t result;
12727 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
12728 : "=w"(result)
12729 : "w"(a), "w"(b)
12730 : /* No clobbers */);
12731 return result;
12734 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12735 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
12737 uint16x4_t result;
12738 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
12739 : "=w"(result)
12740 : "w"(a), "w"(b)
12741 : /* No clobbers */);
12742 return result;
12745 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12746 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
12748 uint32x2_t result;
12749 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
12750 : "=w"(result)
12751 : "w"(a), "w"(b)
12752 : /* No clobbers */);
12753 return result;
12756 #define vset_lane_f32(a, b, c) \
12757 __extension__ \
12758 ({ \
12759 float32x2_t b_ = (b); \
12760 float32_t a_ = (a); \
12761 float32x2_t result; \
12762 __asm__ ("ins %0.s[%3], %w1" \
12763 : "=w"(result) \
12764 : "r"(a_), "0"(b_), "i"(c) \
12765 : /* No clobbers */); \
12766 result; \
12769 #define vset_lane_f64(a, b, c) \
12770 __extension__ \
12771 ({ \
12772 float64x1_t b_ = (b); \
12773 float64_t a_ = (a); \
12774 float64x1_t result; \
12775 __asm__ ("ins %0.d[%3], %x1" \
12776 : "=w"(result) \
12777 : "r"(a_), "0"(b_), "i"(c) \
12778 : /* No clobbers */); \
12779 result; \
12782 #define vset_lane_p8(a, b, c) \
12783 __extension__ \
12784 ({ \
12785 poly8x8_t b_ = (b); \
12786 poly8_t a_ = (a); \
12787 poly8x8_t result; \
12788 __asm__ ("ins %0.b[%3], %w1" \
12789 : "=w"(result) \
12790 : "r"(a_), "0"(b_), "i"(c) \
12791 : /* No clobbers */); \
12792 result; \
12795 #define vset_lane_p16(a, b, c) \
12796 __extension__ \
12797 ({ \
12798 poly16x4_t b_ = (b); \
12799 poly16_t a_ = (a); \
12800 poly16x4_t result; \
12801 __asm__ ("ins %0.h[%3], %w1" \
12802 : "=w"(result) \
12803 : "r"(a_), "0"(b_), "i"(c) \
12804 : /* No clobbers */); \
12805 result; \
12808 #define vset_lane_s8(a, b, c) \
12809 __extension__ \
12810 ({ \
12811 int8x8_t b_ = (b); \
12812 int8_t a_ = (a); \
12813 int8x8_t result; \
12814 __asm__ ("ins %0.b[%3], %w1" \
12815 : "=w"(result) \
12816 : "r"(a_), "0"(b_), "i"(c) \
12817 : /* No clobbers */); \
12818 result; \
12821 #define vset_lane_s16(a, b, c) \
12822 __extension__ \
12823 ({ \
12824 int16x4_t b_ = (b); \
12825 int16_t a_ = (a); \
12826 int16x4_t result; \
12827 __asm__ ("ins %0.h[%3], %w1" \
12828 : "=w"(result) \
12829 : "r"(a_), "0"(b_), "i"(c) \
12830 : /* No clobbers */); \
12831 result; \
12834 #define vset_lane_s32(a, b, c) \
12835 __extension__ \
12836 ({ \
12837 int32x2_t b_ = (b); \
12838 int32_t a_ = (a); \
12839 int32x2_t result; \
12840 __asm__ ("ins %0.s[%3], %w1" \
12841 : "=w"(result) \
12842 : "r"(a_), "0"(b_), "i"(c) \
12843 : /* No clobbers */); \
12844 result; \
12847 #define vset_lane_s64(a, b, c) \
12848 __extension__ \
12849 ({ \
12850 int64x1_t b_ = (b); \
12851 int64_t a_ = (a); \
12852 int64x1_t result; \
12853 __asm__ ("ins %0.d[%3], %x1" \
12854 : "=w"(result) \
12855 : "r"(a_), "0"(b_), "i"(c) \
12856 : /* No clobbers */); \
12857 result; \
12860 #define vset_lane_u8(a, b, c) \
12861 __extension__ \
12862 ({ \
12863 uint8x8_t b_ = (b); \
12864 uint8_t a_ = (a); \
12865 uint8x8_t result; \
12866 __asm__ ("ins %0.b[%3], %w1" \
12867 : "=w"(result) \
12868 : "r"(a_), "0"(b_), "i"(c) \
12869 : /* No clobbers */); \
12870 result; \
12873 #define vset_lane_u16(a, b, c) \
12874 __extension__ \
12875 ({ \
12876 uint16x4_t b_ = (b); \
12877 uint16_t a_ = (a); \
12878 uint16x4_t result; \
12879 __asm__ ("ins %0.h[%3], %w1" \
12880 : "=w"(result) \
12881 : "r"(a_), "0"(b_), "i"(c) \
12882 : /* No clobbers */); \
12883 result; \
12886 #define vset_lane_u32(a, b, c) \
12887 __extension__ \
12888 ({ \
12889 uint32x2_t b_ = (b); \
12890 uint32_t a_ = (a); \
12891 uint32x2_t result; \
12892 __asm__ ("ins %0.s[%3], %w1" \
12893 : "=w"(result) \
12894 : "r"(a_), "0"(b_), "i"(c) \
12895 : /* No clobbers */); \
12896 result; \
12899 #define vset_lane_u64(a, b, c) \
12900 __extension__ \
12901 ({ \
12902 uint64x1_t b_ = (b); \
12903 uint64_t a_ = (a); \
12904 uint64x1_t result; \
12905 __asm__ ("ins %0.d[%3], %x1" \
12906 : "=w"(result) \
12907 : "r"(a_), "0"(b_), "i"(c) \
12908 : /* No clobbers */); \
12909 result; \
12912 #define vsetq_lane_f32(a, b, c) \
12913 __extension__ \
12914 ({ \
12915 float32x4_t b_ = (b); \
12916 float32_t a_ = (a); \
12917 float32x4_t result; \
12918 __asm__ ("ins %0.s[%3], %w1" \
12919 : "=w"(result) \
12920 : "r"(a_), "0"(b_), "i"(c) \
12921 : /* No clobbers */); \
12922 result; \
12925 #define vsetq_lane_f64(a, b, c) \
12926 __extension__ \
12927 ({ \
12928 float64x2_t b_ = (b); \
12929 float64_t a_ = (a); \
12930 float64x2_t result; \
12931 __asm__ ("ins %0.d[%3], %x1" \
12932 : "=w"(result) \
12933 : "r"(a_), "0"(b_), "i"(c) \
12934 : /* No clobbers */); \
12935 result; \
12938 #define vsetq_lane_p8(a, b, c) \
12939 __extension__ \
12940 ({ \
12941 poly8x16_t b_ = (b); \
12942 poly8_t a_ = (a); \
12943 poly8x16_t result; \
12944 __asm__ ("ins %0.b[%3], %w1" \
12945 : "=w"(result) \
12946 : "r"(a_), "0"(b_), "i"(c) \
12947 : /* No clobbers */); \
12948 result; \
12951 #define vsetq_lane_p16(a, b, c) \
12952 __extension__ \
12953 ({ \
12954 poly16x8_t b_ = (b); \
12955 poly16_t a_ = (a); \
12956 poly16x8_t result; \
12957 __asm__ ("ins %0.h[%3], %w1" \
12958 : "=w"(result) \
12959 : "r"(a_), "0"(b_), "i"(c) \
12960 : /* No clobbers */); \
12961 result; \
12964 #define vsetq_lane_s8(a, b, c) \
12965 __extension__ \
12966 ({ \
12967 int8x16_t b_ = (b); \
12968 int8_t a_ = (a); \
12969 int8x16_t result; \
12970 __asm__ ("ins %0.b[%3], %w1" \
12971 : "=w"(result) \
12972 : "r"(a_), "0"(b_), "i"(c) \
12973 : /* No clobbers */); \
12974 result; \
12977 #define vsetq_lane_s16(a, b, c) \
12978 __extension__ \
12979 ({ \
12980 int16x8_t b_ = (b); \
12981 int16_t a_ = (a); \
12982 int16x8_t result; \
12983 __asm__ ("ins %0.h[%3], %w1" \
12984 : "=w"(result) \
12985 : "r"(a_), "0"(b_), "i"(c) \
12986 : /* No clobbers */); \
12987 result; \
12990 #define vsetq_lane_s32(a, b, c) \
12991 __extension__ \
12992 ({ \
12993 int32x4_t b_ = (b); \
12994 int32_t a_ = (a); \
12995 int32x4_t result; \
12996 __asm__ ("ins %0.s[%3], %w1" \
12997 : "=w"(result) \
12998 : "r"(a_), "0"(b_), "i"(c) \
12999 : /* No clobbers */); \
13000 result; \
13003 #define vsetq_lane_s64(a, b, c) \
13004 __extension__ \
13005 ({ \
13006 int64x2_t b_ = (b); \
13007 int64_t a_ = (a); \
13008 int64x2_t result; \
13009 __asm__ ("ins %0.d[%3], %x1" \
13010 : "=w"(result) \
13011 : "r"(a_), "0"(b_), "i"(c) \
13012 : /* No clobbers */); \
13013 result; \
13016 #define vsetq_lane_u8(a, b, c) \
13017 __extension__ \
13018 ({ \
13019 uint8x16_t b_ = (b); \
13020 uint8_t a_ = (a); \
13021 uint8x16_t result; \
13022 __asm__ ("ins %0.b[%3], %w1" \
13023 : "=w"(result) \
13024 : "r"(a_), "0"(b_), "i"(c) \
13025 : /* No clobbers */); \
13026 result; \
13029 #define vsetq_lane_u16(a, b, c) \
13030 __extension__ \
13031 ({ \
13032 uint16x8_t b_ = (b); \
13033 uint16_t a_ = (a); \
13034 uint16x8_t result; \
13035 __asm__ ("ins %0.h[%3], %w1" \
13036 : "=w"(result) \
13037 : "r"(a_), "0"(b_), "i"(c) \
13038 : /* No clobbers */); \
13039 result; \
13042 #define vsetq_lane_u32(a, b, c) \
13043 __extension__ \
13044 ({ \
13045 uint32x4_t b_ = (b); \
13046 uint32_t a_ = (a); \
13047 uint32x4_t result; \
13048 __asm__ ("ins %0.s[%3], %w1" \
13049 : "=w"(result) \
13050 : "r"(a_), "0"(b_), "i"(c) \
13051 : /* No clobbers */); \
13052 result; \
13055 #define vsetq_lane_u64(a, b, c) \
13056 __extension__ \
13057 ({ \
13058 uint64x2_t b_ = (b); \
13059 uint64_t a_ = (a); \
13060 uint64x2_t result; \
13061 __asm__ ("ins %0.d[%3], %x1" \
13062 : "=w"(result) \
13063 : "r"(a_), "0"(b_), "i"(c) \
13064 : /* No clobbers */); \
13065 result; \
13068 #define vshrn_high_n_s16(a, b, c) \
13069 __extension__ \
13070 ({ \
13071 int16x8_t b_ = (b); \
13072 int8x8_t a_ = (a); \
13073 int8x16_t result = vcombine_s8 \
13074 (a_, vcreate_s8 \
13075 (__AARCH64_UINT64_C (0x0))); \
13076 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13077 : "+w"(result) \
13078 : "w"(b_), "i"(c) \
13079 : /* No clobbers */); \
13080 result; \
13083 #define vshrn_high_n_s32(a, b, c) \
13084 __extension__ \
13085 ({ \
13086 int32x4_t b_ = (b); \
13087 int16x4_t a_ = (a); \
13088 int16x8_t result = vcombine_s16 \
13089 (a_, vcreate_s16 \
13090 (__AARCH64_UINT64_C (0x0))); \
13091 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13092 : "+w"(result) \
13093 : "w"(b_), "i"(c) \
13094 : /* No clobbers */); \
13095 result; \
13098 #define vshrn_high_n_s64(a, b, c) \
13099 __extension__ \
13100 ({ \
13101 int64x2_t b_ = (b); \
13102 int32x2_t a_ = (a); \
13103 int32x4_t result = vcombine_s32 \
13104 (a_, vcreate_s32 \
13105 (__AARCH64_UINT64_C (0x0))); \
13106 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13107 : "+w"(result) \
13108 : "w"(b_), "i"(c) \
13109 : /* No clobbers */); \
13110 result; \
13113 #define vshrn_high_n_u16(a, b, c) \
13114 __extension__ \
13115 ({ \
13116 uint16x8_t b_ = (b); \
13117 uint8x8_t a_ = (a); \
13118 uint8x16_t result = vcombine_u8 \
13119 (a_, vcreate_u8 \
13120 (__AARCH64_UINT64_C (0x0))); \
13121 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13122 : "+w"(result) \
13123 : "w"(b_), "i"(c) \
13124 : /* No clobbers */); \
13125 result; \
13128 #define vshrn_high_n_u32(a, b, c) \
13129 __extension__ \
13130 ({ \
13131 uint32x4_t b_ = (b); \
13132 uint16x4_t a_ = (a); \
13133 uint16x8_t result = vcombine_u16 \
13134 (a_, vcreate_u16 \
13135 (__AARCH64_UINT64_C (0x0))); \
13136 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13137 : "+w"(result) \
13138 : "w"(b_), "i"(c) \
13139 : /* No clobbers */); \
13140 result; \
13143 #define vshrn_high_n_u64(a, b, c) \
13144 __extension__ \
13145 ({ \
13146 uint64x2_t b_ = (b); \
13147 uint32x2_t a_ = (a); \
13148 uint32x4_t result = vcombine_u32 \
13149 (a_, vcreate_u32 \
13150 (__AARCH64_UINT64_C (0x0))); \
13151 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13152 : "+w"(result) \
13153 : "w"(b_), "i"(c) \
13154 : /* No clobbers */); \
13155 result; \
13158 #define vshrn_n_s16(a, b) \
13159 __extension__ \
13160 ({ \
13161 int16x8_t a_ = (a); \
13162 int8x8_t result; \
13163 __asm__ ("shrn %0.8b,%1.8h,%2" \
13164 : "=w"(result) \
13165 : "w"(a_), "i"(b) \
13166 : /* No clobbers */); \
13167 result; \
13170 #define vshrn_n_s32(a, b) \
13171 __extension__ \
13172 ({ \
13173 int32x4_t a_ = (a); \
13174 int16x4_t result; \
13175 __asm__ ("shrn %0.4h,%1.4s,%2" \
13176 : "=w"(result) \
13177 : "w"(a_), "i"(b) \
13178 : /* No clobbers */); \
13179 result; \
13182 #define vshrn_n_s64(a, b) \
13183 __extension__ \
13184 ({ \
13185 int64x2_t a_ = (a); \
13186 int32x2_t result; \
13187 __asm__ ("shrn %0.2s,%1.2d,%2" \
13188 : "=w"(result) \
13189 : "w"(a_), "i"(b) \
13190 : /* No clobbers */); \
13191 result; \
13194 #define vshrn_n_u16(a, b) \
13195 __extension__ \
13196 ({ \
13197 uint16x8_t a_ = (a); \
13198 uint8x8_t result; \
13199 __asm__ ("shrn %0.8b,%1.8h,%2" \
13200 : "=w"(result) \
13201 : "w"(a_), "i"(b) \
13202 : /* No clobbers */); \
13203 result; \
13206 #define vshrn_n_u32(a, b) \
13207 __extension__ \
13208 ({ \
13209 uint32x4_t a_ = (a); \
13210 uint16x4_t result; \
13211 __asm__ ("shrn %0.4h,%1.4s,%2" \
13212 : "=w"(result) \
13213 : "w"(a_), "i"(b) \
13214 : /* No clobbers */); \
13215 result; \
13218 #define vshrn_n_u64(a, b) \
13219 __extension__ \
13220 ({ \
13221 uint64x2_t a_ = (a); \
13222 uint32x2_t result; \
13223 __asm__ ("shrn %0.2s,%1.2d,%2" \
13224 : "=w"(result) \
13225 : "w"(a_), "i"(b) \
13226 : /* No clobbers */); \
13227 result; \
13230 #define vsli_n_p8(a, b, c) \
13231 __extension__ \
13232 ({ \
13233 poly8x8_t b_ = (b); \
13234 poly8x8_t a_ = (a); \
13235 poly8x8_t result; \
13236 __asm__ ("sli %0.8b,%2.8b,%3" \
13237 : "=w"(result) \
13238 : "0"(a_), "w"(b_), "i"(c) \
13239 : /* No clobbers */); \
13240 result; \
13243 #define vsli_n_p16(a, b, c) \
13244 __extension__ \
13245 ({ \
13246 poly16x4_t b_ = (b); \
13247 poly16x4_t a_ = (a); \
13248 poly16x4_t result; \
13249 __asm__ ("sli %0.4h,%2.4h,%3" \
13250 : "=w"(result) \
13251 : "0"(a_), "w"(b_), "i"(c) \
13252 : /* No clobbers */); \
13253 result; \
13256 #define vsliq_n_p8(a, b, c) \
13257 __extension__ \
13258 ({ \
13259 poly8x16_t b_ = (b); \
13260 poly8x16_t a_ = (a); \
13261 poly8x16_t result; \
13262 __asm__ ("sli %0.16b,%2.16b,%3" \
13263 : "=w"(result) \
13264 : "0"(a_), "w"(b_), "i"(c) \
13265 : /* No clobbers */); \
13266 result; \
13269 #define vsliq_n_p16(a, b, c) \
13270 __extension__ \
13271 ({ \
13272 poly16x8_t b_ = (b); \
13273 poly16x8_t a_ = (a); \
13274 poly16x8_t result; \
13275 __asm__ ("sli %0.8h,%2.8h,%3" \
13276 : "=w"(result) \
13277 : "0"(a_), "w"(b_), "i"(c) \
13278 : /* No clobbers */); \
13279 result; \
13282 #define vsri_n_p8(a, b, c) \
13283 __extension__ \
13284 ({ \
13285 poly8x8_t b_ = (b); \
13286 poly8x8_t a_ = (a); \
13287 poly8x8_t result; \
13288 __asm__ ("sri %0.8b,%2.8b,%3" \
13289 : "=w"(result) \
13290 : "0"(a_), "w"(b_), "i"(c) \
13291 : /* No clobbers */); \
13292 result; \
13295 #define vsri_n_p16(a, b, c) \
13296 __extension__ \
13297 ({ \
13298 poly16x4_t b_ = (b); \
13299 poly16x4_t a_ = (a); \
13300 poly16x4_t result; \
13301 __asm__ ("sri %0.4h,%2.4h,%3" \
13302 : "=w"(result) \
13303 : "0"(a_), "w"(b_), "i"(c) \
13304 : /* No clobbers */); \
13305 result; \
13308 #define vsriq_n_p8(a, b, c) \
13309 __extension__ \
13310 ({ \
13311 poly8x16_t b_ = (b); \
13312 poly8x16_t a_ = (a); \
13313 poly8x16_t result; \
13314 __asm__ ("sri %0.16b,%2.16b,%3" \
13315 : "=w"(result) \
13316 : "0"(a_), "w"(b_), "i"(c) \
13317 : /* No clobbers */); \
13318 result; \
13321 #define vsriq_n_p16(a, b, c) \
13322 __extension__ \
13323 ({ \
13324 poly16x8_t b_ = (b); \
13325 poly16x8_t a_ = (a); \
13326 poly16x8_t result; \
13327 __asm__ ("sri %0.8h,%2.8h,%3" \
13328 : "=w"(result) \
13329 : "0"(a_), "w"(b_), "i"(c) \
13330 : /* No clobbers */); \
13331 result; \
13334 #define vst1_lane_f32(a, b, c) \
13335 __extension__ \
13336 ({ \
13337 float32x2_t b_ = (b); \
13338 float32_t * a_ = (a); \
13339 __asm__ ("st1 {%1.s}[%2],[%0]" \
13341 : "r"(a_), "w"(b_), "i"(c) \
13342 : "memory"); \
13345 #define vst1_lane_f64(a, b, c) \
13346 __extension__ \
13347 ({ \
13348 float64x1_t b_ = (b); \
13349 float64_t * a_ = (a); \
13350 __asm__ ("st1 {%1.d}[%2],[%0]" \
13352 : "r"(a_), "w"(b_), "i"(c) \
13353 : "memory"); \
13356 #define vst1_lane_p8(a, b, c) \
13357 __extension__ \
13358 ({ \
13359 poly8x8_t b_ = (b); \
13360 poly8_t * a_ = (a); \
13361 __asm__ ("st1 {%1.b}[%2],[%0]" \
13363 : "r"(a_), "w"(b_), "i"(c) \
13364 : "memory"); \
13367 #define vst1_lane_p16(a, b, c) \
13368 __extension__ \
13369 ({ \
13370 poly16x4_t b_ = (b); \
13371 poly16_t * a_ = (a); \
13372 __asm__ ("st1 {%1.h}[%2],[%0]" \
13374 : "r"(a_), "w"(b_), "i"(c) \
13375 : "memory"); \
13378 #define vst1_lane_s8(a, b, c) \
13379 __extension__ \
13380 ({ \
13381 int8x8_t b_ = (b); \
13382 int8_t * a_ = (a); \
13383 __asm__ ("st1 {%1.b}[%2],[%0]" \
13385 : "r"(a_), "w"(b_), "i"(c) \
13386 : "memory"); \
13389 #define vst1_lane_s16(a, b, c) \
13390 __extension__ \
13391 ({ \
13392 int16x4_t b_ = (b); \
13393 int16_t * a_ = (a); \
13394 __asm__ ("st1 {%1.h}[%2],[%0]" \
13396 : "r"(a_), "w"(b_), "i"(c) \
13397 : "memory"); \
13400 #define vst1_lane_s32(a, b, c) \
13401 __extension__ \
13402 ({ \
13403 int32x2_t b_ = (b); \
13404 int32_t * a_ = (a); \
13405 __asm__ ("st1 {%1.s}[%2],[%0]" \
13407 : "r"(a_), "w"(b_), "i"(c) \
13408 : "memory"); \
13411 #define vst1_lane_s64(a, b, c) \
13412 __extension__ \
13413 ({ \
13414 int64x1_t b_ = (b); \
13415 int64_t * a_ = (a); \
13416 __asm__ ("st1 {%1.d}[%2],[%0]" \
13418 : "r"(a_), "w"(b_), "i"(c) \
13419 : "memory"); \
13422 #define vst1_lane_u8(a, b, c) \
13423 __extension__ \
13424 ({ \
13425 uint8x8_t b_ = (b); \
13426 uint8_t * a_ = (a); \
13427 __asm__ ("st1 {%1.b}[%2],[%0]" \
13429 : "r"(a_), "w"(b_), "i"(c) \
13430 : "memory"); \
13433 #define vst1_lane_u16(a, b, c) \
13434 __extension__ \
13435 ({ \
13436 uint16x4_t b_ = (b); \
13437 uint16_t * a_ = (a); \
13438 __asm__ ("st1 {%1.h}[%2],[%0]" \
13440 : "r"(a_), "w"(b_), "i"(c) \
13441 : "memory"); \
13444 #define vst1_lane_u32(a, b, c) \
13445 __extension__ \
13446 ({ \
13447 uint32x2_t b_ = (b); \
13448 uint32_t * a_ = (a); \
13449 __asm__ ("st1 {%1.s}[%2],[%0]" \
13451 : "r"(a_), "w"(b_), "i"(c) \
13452 : "memory"); \
13455 #define vst1_lane_u64(a, b, c) \
13456 __extension__ \
13457 ({ \
13458 uint64x1_t b_ = (b); \
13459 uint64_t * a_ = (a); \
13460 __asm__ ("st1 {%1.d}[%2],[%0]" \
13462 : "r"(a_), "w"(b_), "i"(c) \
13463 : "memory"); \
13467 #define vst1q_lane_f32(a, b, c) \
13468 __extension__ \
13469 ({ \
13470 float32x4_t b_ = (b); \
13471 float32_t * a_ = (a); \
13472 __asm__ ("st1 {%1.s}[%2],[%0]" \
13474 : "r"(a_), "w"(b_), "i"(c) \
13475 : "memory"); \
13478 #define vst1q_lane_f64(a, b, c) \
13479 __extension__ \
13480 ({ \
13481 float64x2_t b_ = (b); \
13482 float64_t * a_ = (a); \
13483 __asm__ ("st1 {%1.d}[%2],[%0]" \
13485 : "r"(a_), "w"(b_), "i"(c) \
13486 : "memory"); \
13489 #define vst1q_lane_p8(a, b, c) \
13490 __extension__ \
13491 ({ \
13492 poly8x16_t b_ = (b); \
13493 poly8_t * a_ = (a); \
13494 __asm__ ("st1 {%1.b}[%2],[%0]" \
13496 : "r"(a_), "w"(b_), "i"(c) \
13497 : "memory"); \
13500 #define vst1q_lane_p16(a, b, c) \
13501 __extension__ \
13502 ({ \
13503 poly16x8_t b_ = (b); \
13504 poly16_t * a_ = (a); \
13505 __asm__ ("st1 {%1.h}[%2],[%0]" \
13507 : "r"(a_), "w"(b_), "i"(c) \
13508 : "memory"); \
13511 #define vst1q_lane_s8(a, b, c) \
13512 __extension__ \
13513 ({ \
13514 int8x16_t b_ = (b); \
13515 int8_t * a_ = (a); \
13516 __asm__ ("st1 {%1.b}[%2],[%0]" \
13518 : "r"(a_), "w"(b_), "i"(c) \
13519 : "memory"); \
13522 #define vst1q_lane_s16(a, b, c) \
13523 __extension__ \
13524 ({ \
13525 int16x8_t b_ = (b); \
13526 int16_t * a_ = (a); \
13527 __asm__ ("st1 {%1.h}[%2],[%0]" \
13529 : "r"(a_), "w"(b_), "i"(c) \
13530 : "memory"); \
13533 #define vst1q_lane_s32(a, b, c) \
13534 __extension__ \
13535 ({ \
13536 int32x4_t b_ = (b); \
13537 int32_t * a_ = (a); \
13538 __asm__ ("st1 {%1.s}[%2],[%0]" \
13540 : "r"(a_), "w"(b_), "i"(c) \
13541 : "memory"); \
13544 #define vst1q_lane_s64(a, b, c) \
13545 __extension__ \
13546 ({ \
13547 int64x2_t b_ = (b); \
13548 int64_t * a_ = (a); \
13549 __asm__ ("st1 {%1.d}[%2],[%0]" \
13551 : "r"(a_), "w"(b_), "i"(c) \
13552 : "memory"); \
13555 #define vst1q_lane_u8(a, b, c) \
13556 __extension__ \
13557 ({ \
13558 uint8x16_t b_ = (b); \
13559 uint8_t * a_ = (a); \
13560 __asm__ ("st1 {%1.b}[%2],[%0]" \
13562 : "r"(a_), "w"(b_), "i"(c) \
13563 : "memory"); \
13566 #define vst1q_lane_u16(a, b, c) \
13567 __extension__ \
13568 ({ \
13569 uint16x8_t b_ = (b); \
13570 uint16_t * a_ = (a); \
13571 __asm__ ("st1 {%1.h}[%2],[%0]" \
13573 : "r"(a_), "w"(b_), "i"(c) \
13574 : "memory"); \
13577 #define vst1q_lane_u32(a, b, c) \
13578 __extension__ \
13579 ({ \
13580 uint32x4_t b_ = (b); \
13581 uint32_t * a_ = (a); \
13582 __asm__ ("st1 {%1.s}[%2],[%0]" \
13584 : "r"(a_), "w"(b_), "i"(c) \
13585 : "memory"); \
13588 #define vst1q_lane_u64(a, b, c) \
13589 __extension__ \
13590 ({ \
13591 uint64x2_t b_ = (b); \
13592 uint64_t * a_ = (a); \
13593 __asm__ ("st1 {%1.d}[%2],[%0]" \
13595 : "r"(a_), "w"(b_), "i"(c) \
13596 : "memory"); \
13599 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13600 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
13602 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13603 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
13604 : "+w"(result)
13605 : "w"(b), "w"(c)
13606 : /* No clobbers */);
13607 return result;
13610 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13611 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
13613 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
13614 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
13615 : "+w"(result)
13616 : "w"(b), "w"(c)
13617 : /* No clobbers */);
13618 return result;
13621 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13622 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
13624 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
13625 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
13626 : "+w"(result)
13627 : "w"(b), "w"(c)
13628 : /* No clobbers */);
13629 return result;
13632 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13633 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
13635 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13636 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
13637 : "+w"(result)
13638 : "w"(b), "w"(c)
13639 : /* No clobbers */);
13640 return result;
13643 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13644 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
13646 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
13647 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
13648 : "+w"(result)
13649 : "w"(b), "w"(c)
13650 : /* No clobbers */);
13651 return result;
13654 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13655 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
13657 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
13658 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
13659 : "+w"(result)
13660 : "w"(b), "w"(c)
13661 : /* No clobbers */);
13662 return result;
13665 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13666 vsubhn_s16 (int16x8_t a, int16x8_t b)
13668 int8x8_t result;
13669 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
13670 : "=w"(result)
13671 : "w"(a), "w"(b)
13672 : /* No clobbers */);
13673 return result;
13676 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13677 vsubhn_s32 (int32x4_t a, int32x4_t b)
13679 int16x4_t result;
13680 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
13681 : "=w"(result)
13682 : "w"(a), "w"(b)
13683 : /* No clobbers */);
13684 return result;
13687 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13688 vsubhn_s64 (int64x2_t a, int64x2_t b)
13690 int32x2_t result;
13691 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
13692 : "=w"(result)
13693 : "w"(a), "w"(b)
13694 : /* No clobbers */);
13695 return result;
13698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13699 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
13701 uint8x8_t result;
13702 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
13703 : "=w"(result)
13704 : "w"(a), "w"(b)
13705 : /* No clobbers */);
13706 return result;
13709 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13710 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
13712 uint16x4_t result;
13713 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
13714 : "=w"(result)
13715 : "w"(a), "w"(b)
13716 : /* No clobbers */);
13717 return result;
13720 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13721 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
13723 uint32x2_t result;
13724 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
13725 : "=w"(result)
13726 : "w"(a), "w"(b)
13727 : /* No clobbers */);
13728 return result;
13731 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13732 vtrn1_f32 (float32x2_t a, float32x2_t b)
13734 float32x2_t result;
13735 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
13736 : "=w"(result)
13737 : "w"(a), "w"(b)
13738 : /* No clobbers */);
13739 return result;
13742 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13743 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
13745 poly8x8_t result;
13746 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
13747 : "=w"(result)
13748 : "w"(a), "w"(b)
13749 : /* No clobbers */);
13750 return result;
13753 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13754 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
13756 poly16x4_t result;
13757 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
13758 : "=w"(result)
13759 : "w"(a), "w"(b)
13760 : /* No clobbers */);
13761 return result;
13764 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13765 vtrn1_s8 (int8x8_t a, int8x8_t b)
13767 int8x8_t result;
13768 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
13769 : "=w"(result)
13770 : "w"(a), "w"(b)
13771 : /* No clobbers */);
13772 return result;
13775 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13776 vtrn1_s16 (int16x4_t a, int16x4_t b)
13778 int16x4_t result;
13779 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
13780 : "=w"(result)
13781 : "w"(a), "w"(b)
13782 : /* No clobbers */);
13783 return result;
13786 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13787 vtrn1_s32 (int32x2_t a, int32x2_t b)
13789 int32x2_t result;
13790 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
13791 : "=w"(result)
13792 : "w"(a), "w"(b)
13793 : /* No clobbers */);
13794 return result;
13797 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13798 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
13800 uint8x8_t result;
13801 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
13802 : "=w"(result)
13803 : "w"(a), "w"(b)
13804 : /* No clobbers */);
13805 return result;
13808 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13809 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
13811 uint16x4_t result;
13812 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
13813 : "=w"(result)
13814 : "w"(a), "w"(b)
13815 : /* No clobbers */);
13816 return result;
13819 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13820 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
13822 uint32x2_t result;
13823 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
13824 : "=w"(result)
13825 : "w"(a), "w"(b)
13826 : /* No clobbers */);
13827 return result;
13830 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13831 vtrn1q_f32 (float32x4_t a, float32x4_t b)
13833 float32x4_t result;
13834 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13835 : "=w"(result)
13836 : "w"(a), "w"(b)
13837 : /* No clobbers */);
13838 return result;
13841 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13842 vtrn1q_f64 (float64x2_t a, float64x2_t b)
13844 float64x2_t result;
13845 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13846 : "=w"(result)
13847 : "w"(a), "w"(b)
13848 : /* No clobbers */);
13849 return result;
13852 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13853 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
13855 poly8x16_t result;
13856 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13857 : "=w"(result)
13858 : "w"(a), "w"(b)
13859 : /* No clobbers */);
13860 return result;
13863 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13864 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
13866 poly16x8_t result;
13867 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13868 : "=w"(result)
13869 : "w"(a), "w"(b)
13870 : /* No clobbers */);
13871 return result;
13874 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13875 vtrn1q_s8 (int8x16_t a, int8x16_t b)
13877 int8x16_t result;
13878 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13879 : "=w"(result)
13880 : "w"(a), "w"(b)
13881 : /* No clobbers */);
13882 return result;
13885 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13886 vtrn1q_s16 (int16x8_t a, int16x8_t b)
13888 int16x8_t result;
13889 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13890 : "=w"(result)
13891 : "w"(a), "w"(b)
13892 : /* No clobbers */);
13893 return result;
13896 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13897 vtrn1q_s32 (int32x4_t a, int32x4_t b)
13899 int32x4_t result;
13900 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13901 : "=w"(result)
13902 : "w"(a), "w"(b)
13903 : /* No clobbers */);
13904 return result;
13907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13908 vtrn1q_s64 (int64x2_t a, int64x2_t b)
13910 int64x2_t result;
13911 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13912 : "=w"(result)
13913 : "w"(a), "w"(b)
13914 : /* No clobbers */);
13915 return result;
13918 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13919 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
13921 uint8x16_t result;
13922 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13923 : "=w"(result)
13924 : "w"(a), "w"(b)
13925 : /* No clobbers */);
13926 return result;
13929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13930 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
13932 uint16x8_t result;
13933 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13934 : "=w"(result)
13935 : "w"(a), "w"(b)
13936 : /* No clobbers */);
13937 return result;
13940 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13941 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
13943 uint32x4_t result;
13944 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13945 : "=w"(result)
13946 : "w"(a), "w"(b)
13947 : /* No clobbers */);
13948 return result;
13951 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13952 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
13954 uint64x2_t result;
13955 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13956 : "=w"(result)
13957 : "w"(a), "w"(b)
13958 : /* No clobbers */);
13959 return result;
13962 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13963 vtrn2_f32 (float32x2_t a, float32x2_t b)
13965 float32x2_t result;
13966 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
13967 : "=w"(result)
13968 : "w"(a), "w"(b)
13969 : /* No clobbers */);
13970 return result;
13973 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13974 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
13976 poly8x8_t result;
13977 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
13978 : "=w"(result)
13979 : "w"(a), "w"(b)
13980 : /* No clobbers */);
13981 return result;
13984 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13985 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
13987 poly16x4_t result;
13988 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
13989 : "=w"(result)
13990 : "w"(a), "w"(b)
13991 : /* No clobbers */);
13992 return result;
13995 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13996 vtrn2_s8 (int8x8_t a, int8x8_t b)
13998 int8x8_t result;
13999 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14000 : "=w"(result)
14001 : "w"(a), "w"(b)
14002 : /* No clobbers */);
14003 return result;
14006 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14007 vtrn2_s16 (int16x4_t a, int16x4_t b)
14009 int16x4_t result;
14010 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14011 : "=w"(result)
14012 : "w"(a), "w"(b)
14013 : /* No clobbers */);
14014 return result;
14017 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14018 vtrn2_s32 (int32x2_t a, int32x2_t b)
14020 int32x2_t result;
14021 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14022 : "=w"(result)
14023 : "w"(a), "w"(b)
14024 : /* No clobbers */);
14025 return result;
14028 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14029 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
14031 uint8x8_t result;
14032 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14033 : "=w"(result)
14034 : "w"(a), "w"(b)
14035 : /* No clobbers */);
14036 return result;
14039 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14040 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
14042 uint16x4_t result;
14043 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14044 : "=w"(result)
14045 : "w"(a), "w"(b)
14046 : /* No clobbers */);
14047 return result;
14050 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14051 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
14053 uint32x2_t result;
14054 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14055 : "=w"(result)
14056 : "w"(a), "w"(b)
14057 : /* No clobbers */);
14058 return result;
14061 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14062 vtrn2q_f32 (float32x4_t a, float32x4_t b)
14064 float32x4_t result;
14065 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14066 : "=w"(result)
14067 : "w"(a), "w"(b)
14068 : /* No clobbers */);
14069 return result;
14072 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14073 vtrn2q_f64 (float64x2_t a, float64x2_t b)
14075 float64x2_t result;
14076 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14077 : "=w"(result)
14078 : "w"(a), "w"(b)
14079 : /* No clobbers */);
14080 return result;
14083 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14084 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
14086 poly8x16_t result;
14087 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14088 : "=w"(result)
14089 : "w"(a), "w"(b)
14090 : /* No clobbers */);
14091 return result;
14094 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14095 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
14097 poly16x8_t result;
14098 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14099 : "=w"(result)
14100 : "w"(a), "w"(b)
14101 : /* No clobbers */);
14102 return result;
14105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14106 vtrn2q_s8 (int8x16_t a, int8x16_t b)
14108 int8x16_t result;
14109 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14110 : "=w"(result)
14111 : "w"(a), "w"(b)
14112 : /* No clobbers */);
14113 return result;
14116 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14117 vtrn2q_s16 (int16x8_t a, int16x8_t b)
14119 int16x8_t result;
14120 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14121 : "=w"(result)
14122 : "w"(a), "w"(b)
14123 : /* No clobbers */);
14124 return result;
14127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14128 vtrn2q_s32 (int32x4_t a, int32x4_t b)
14130 int32x4_t result;
14131 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14132 : "=w"(result)
14133 : "w"(a), "w"(b)
14134 : /* No clobbers */);
14135 return result;
14138 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14139 vtrn2q_s64 (int64x2_t a, int64x2_t b)
14141 int64x2_t result;
14142 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14143 : "=w"(result)
14144 : "w"(a), "w"(b)
14145 : /* No clobbers */);
14146 return result;
14149 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14150 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
14152 uint8x16_t result;
14153 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14154 : "=w"(result)
14155 : "w"(a), "w"(b)
14156 : /* No clobbers */);
14157 return result;
14160 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14161 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
14163 uint16x8_t result;
14164 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14165 : "=w"(result)
14166 : "w"(a), "w"(b)
14167 : /* No clobbers */);
14168 return result;
14171 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14172 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
14174 uint32x4_t result;
14175 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14176 : "=w"(result)
14177 : "w"(a), "w"(b)
14178 : /* No clobbers */);
14179 return result;
14182 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14183 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
14185 uint64x2_t result;
14186 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14187 : "=w"(result)
14188 : "w"(a), "w"(b)
14189 : /* No clobbers */);
14190 return result;
14193 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14194 vtst_p8 (poly8x8_t a, poly8x8_t b)
14196 uint8x8_t result;
14197 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
14198 : "=w"(result)
14199 : "w"(a), "w"(b)
14200 : /* No clobbers */);
14201 return result;
14204 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14205 vtst_p16 (poly16x4_t a, poly16x4_t b)
14207 uint16x4_t result;
14208 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
14209 : "=w"(result)
14210 : "w"(a), "w"(b)
14211 : /* No clobbers */);
14212 return result;
14215 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14216 vtstq_p8 (poly8x16_t a, poly8x16_t b)
14218 uint8x16_t result;
14219 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
14220 : "=w"(result)
14221 : "w"(a), "w"(b)
14222 : /* No clobbers */);
14223 return result;
14226 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14227 vtstq_p16 (poly16x8_t a, poly16x8_t b)
14229 uint16x8_t result;
14230 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
14231 : "=w"(result)
14232 : "w"(a), "w"(b)
14233 : /* No clobbers */);
14234 return result;
14236 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14237 vuzp1_f32 (float32x2_t a, float32x2_t b)
14239 float32x2_t result;
14240 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
14241 : "=w"(result)
14242 : "w"(a), "w"(b)
14243 : /* No clobbers */);
14244 return result;
14247 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14248 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
14250 poly8x8_t result;
14251 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
14252 : "=w"(result)
14253 : "w"(a), "w"(b)
14254 : /* No clobbers */);
14255 return result;
14258 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14259 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
14261 poly16x4_t result;
14262 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
14263 : "=w"(result)
14264 : "w"(a), "w"(b)
14265 : /* No clobbers */);
14266 return result;
14269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14270 vuzp1_s8 (int8x8_t a, int8x8_t b)
14272 int8x8_t result;
14273 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
14274 : "=w"(result)
14275 : "w"(a), "w"(b)
14276 : /* No clobbers */);
14277 return result;
14280 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14281 vuzp1_s16 (int16x4_t a, int16x4_t b)
14283 int16x4_t result;
14284 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
14285 : "=w"(result)
14286 : "w"(a), "w"(b)
14287 : /* No clobbers */);
14288 return result;
14291 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14292 vuzp1_s32 (int32x2_t a, int32x2_t b)
14294 int32x2_t result;
14295 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
14296 : "=w"(result)
14297 : "w"(a), "w"(b)
14298 : /* No clobbers */);
14299 return result;
14302 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14303 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
14305 uint8x8_t result;
14306 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
14307 : "=w"(result)
14308 : "w"(a), "w"(b)
14309 : /* No clobbers */);
14310 return result;
14313 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14314 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
14316 uint16x4_t result;
14317 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
14318 : "=w"(result)
14319 : "w"(a), "w"(b)
14320 : /* No clobbers */);
14321 return result;
14324 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14325 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
14327 uint32x2_t result;
14328 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
14329 : "=w"(result)
14330 : "w"(a), "w"(b)
14331 : /* No clobbers */);
14332 return result;
14335 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14336 vuzp1q_f32 (float32x4_t a, float32x4_t b)
14338 float32x4_t result;
14339 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
14340 : "=w"(result)
14341 : "w"(a), "w"(b)
14342 : /* No clobbers */);
14343 return result;
14346 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14347 vuzp1q_f64 (float64x2_t a, float64x2_t b)
14349 float64x2_t result;
14350 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
14351 : "=w"(result)
14352 : "w"(a), "w"(b)
14353 : /* No clobbers */);
14354 return result;
14357 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14358 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
14360 poly8x16_t result;
14361 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
14362 : "=w"(result)
14363 : "w"(a), "w"(b)
14364 : /* No clobbers */);
14365 return result;
14368 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14369 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
14371 poly16x8_t result;
14372 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
14373 : "=w"(result)
14374 : "w"(a), "w"(b)
14375 : /* No clobbers */);
14376 return result;
14379 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14380 vuzp1q_s8 (int8x16_t a, int8x16_t b)
14382 int8x16_t result;
14383 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
14384 : "=w"(result)
14385 : "w"(a), "w"(b)
14386 : /* No clobbers */);
14387 return result;
14390 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14391 vuzp1q_s16 (int16x8_t a, int16x8_t b)
14393 int16x8_t result;
14394 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
14395 : "=w"(result)
14396 : "w"(a), "w"(b)
14397 : /* No clobbers */);
14398 return result;
14401 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14402 vuzp1q_s32 (int32x4_t a, int32x4_t b)
14404 int32x4_t result;
14405 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
14406 : "=w"(result)
14407 : "w"(a), "w"(b)
14408 : /* No clobbers */);
14409 return result;
14412 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14413 vuzp1q_s64 (int64x2_t a, int64x2_t b)
14415 int64x2_t result;
14416 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
14417 : "=w"(result)
14418 : "w"(a), "w"(b)
14419 : /* No clobbers */);
14420 return result;
14423 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14424 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
14426 uint8x16_t result;
14427 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
14428 : "=w"(result)
14429 : "w"(a), "w"(b)
14430 : /* No clobbers */);
14431 return result;
14434 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14435 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
14437 uint16x8_t result;
14438 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
14439 : "=w"(result)
14440 : "w"(a), "w"(b)
14441 : /* No clobbers */);
14442 return result;
14445 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14446 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
14448 uint32x4_t result;
14449 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
14450 : "=w"(result)
14451 : "w"(a), "w"(b)
14452 : /* No clobbers */);
14453 return result;
14456 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14457 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
14459 uint64x2_t result;
14460 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
14461 : "=w"(result)
14462 : "w"(a), "w"(b)
14463 : /* No clobbers */);
14464 return result;
14467 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14468 vuzp2_f32 (float32x2_t a, float32x2_t b)
14470 float32x2_t result;
14471 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
14472 : "=w"(result)
14473 : "w"(a), "w"(b)
14474 : /* No clobbers */);
14475 return result;
14478 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14479 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
14481 poly8x8_t result;
14482 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
14483 : "=w"(result)
14484 : "w"(a), "w"(b)
14485 : /* No clobbers */);
14486 return result;
14489 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14490 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
14492 poly16x4_t result;
14493 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
14494 : "=w"(result)
14495 : "w"(a), "w"(b)
14496 : /* No clobbers */);
14497 return result;
14500 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14501 vuzp2_s8 (int8x8_t a, int8x8_t b)
14503 int8x8_t result;
14504 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
14505 : "=w"(result)
14506 : "w"(a), "w"(b)
14507 : /* No clobbers */);
14508 return result;
14511 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14512 vuzp2_s16 (int16x4_t a, int16x4_t b)
14514 int16x4_t result;
14515 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
14516 : "=w"(result)
14517 : "w"(a), "w"(b)
14518 : /* No clobbers */);
14519 return result;
14522 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14523 vuzp2_s32 (int32x2_t a, int32x2_t b)
14525 int32x2_t result;
14526 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
14527 : "=w"(result)
14528 : "w"(a), "w"(b)
14529 : /* No clobbers */);
14530 return result;
14533 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14534 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
14536 uint8x8_t result;
14537 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
14538 : "=w"(result)
14539 : "w"(a), "w"(b)
14540 : /* No clobbers */);
14541 return result;
14544 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14545 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
14547 uint16x4_t result;
14548 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
14549 : "=w"(result)
14550 : "w"(a), "w"(b)
14551 : /* No clobbers */);
14552 return result;
14555 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14556 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
14558 uint32x2_t result;
14559 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
14560 : "=w"(result)
14561 : "w"(a), "w"(b)
14562 : /* No clobbers */);
14563 return result;
14566 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14567 vuzp2q_f32 (float32x4_t a, float32x4_t b)
14569 float32x4_t result;
14570 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
14571 : "=w"(result)
14572 : "w"(a), "w"(b)
14573 : /* No clobbers */);
14574 return result;
14577 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14578 vuzp2q_f64 (float64x2_t a, float64x2_t b)
14580 float64x2_t result;
14581 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
14582 : "=w"(result)
14583 : "w"(a), "w"(b)
14584 : /* No clobbers */);
14585 return result;
14588 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14589 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
14591 poly8x16_t result;
14592 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
14593 : "=w"(result)
14594 : "w"(a), "w"(b)
14595 : /* No clobbers */);
14596 return result;
14599 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14600 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
14602 poly16x8_t result;
14603 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
14604 : "=w"(result)
14605 : "w"(a), "w"(b)
14606 : /* No clobbers */);
14607 return result;
14610 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14611 vuzp2q_s8 (int8x16_t a, int8x16_t b)
14613 int8x16_t result;
14614 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
14615 : "=w"(result)
14616 : "w"(a), "w"(b)
14617 : /* No clobbers */);
14618 return result;
14621 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14622 vuzp2q_s16 (int16x8_t a, int16x8_t b)
14624 int16x8_t result;
14625 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
14626 : "=w"(result)
14627 : "w"(a), "w"(b)
14628 : /* No clobbers */);
14629 return result;
14632 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14633 vuzp2q_s32 (int32x4_t a, int32x4_t b)
14635 int32x4_t result;
14636 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
14637 : "=w"(result)
14638 : "w"(a), "w"(b)
14639 : /* No clobbers */);
14640 return result;
14643 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14644 vuzp2q_s64 (int64x2_t a, int64x2_t b)
14646 int64x2_t result;
14647 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
14648 : "=w"(result)
14649 : "w"(a), "w"(b)
14650 : /* No clobbers */);
14651 return result;
14654 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14655 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
14657 uint8x16_t result;
14658 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
14659 : "=w"(result)
14660 : "w"(a), "w"(b)
14661 : /* No clobbers */);
14662 return result;
14665 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14666 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
14668 uint16x8_t result;
14669 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
14670 : "=w"(result)
14671 : "w"(a), "w"(b)
14672 : /* No clobbers */);
14673 return result;
14676 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14677 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
14679 uint32x4_t result;
14680 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
14681 : "=w"(result)
14682 : "w"(a), "w"(b)
14683 : /* No clobbers */);
14684 return result;
14687 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14688 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
14690 uint64x2_t result;
14691 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
14692 : "=w"(result)
14693 : "w"(a), "w"(b)
14694 : /* No clobbers */);
14695 return result;
14698 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14699 vzip1_f32 (float32x2_t a, float32x2_t b)
14701 float32x2_t result;
14702 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
14703 : "=w"(result)
14704 : "w"(a), "w"(b)
14705 : /* No clobbers */);
14706 return result;
14709 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14710 vzip1_p8 (poly8x8_t a, poly8x8_t b)
14712 poly8x8_t result;
14713 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
14714 : "=w"(result)
14715 : "w"(a), "w"(b)
14716 : /* No clobbers */);
14717 return result;
14720 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14721 vzip1_p16 (poly16x4_t a, poly16x4_t b)
14723 poly16x4_t result;
14724 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
14725 : "=w"(result)
14726 : "w"(a), "w"(b)
14727 : /* No clobbers */);
14728 return result;
14731 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14732 vzip1_s8 (int8x8_t a, int8x8_t b)
14734 int8x8_t result;
14735 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
14736 : "=w"(result)
14737 : "w"(a), "w"(b)
14738 : /* No clobbers */);
14739 return result;
14742 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14743 vzip1_s16 (int16x4_t a, int16x4_t b)
14745 int16x4_t result;
14746 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
14747 : "=w"(result)
14748 : "w"(a), "w"(b)
14749 : /* No clobbers */);
14750 return result;
14753 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14754 vzip1_s32 (int32x2_t a, int32x2_t b)
14756 int32x2_t result;
14757 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
14758 : "=w"(result)
14759 : "w"(a), "w"(b)
14760 : /* No clobbers */);
14761 return result;
14764 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14765 vzip1_u8 (uint8x8_t a, uint8x8_t b)
14767 uint8x8_t result;
14768 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
14769 : "=w"(result)
14770 : "w"(a), "w"(b)
14771 : /* No clobbers */);
14772 return result;
14775 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14776 vzip1_u16 (uint16x4_t a, uint16x4_t b)
14778 uint16x4_t result;
14779 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
14780 : "=w"(result)
14781 : "w"(a), "w"(b)
14782 : /* No clobbers */);
14783 return result;
14786 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14787 vzip1_u32 (uint32x2_t a, uint32x2_t b)
14789 uint32x2_t result;
14790 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
14791 : "=w"(result)
14792 : "w"(a), "w"(b)
14793 : /* No clobbers */);
14794 return result;
14797 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14798 vzip1q_f32 (float32x4_t a, float32x4_t b)
14800 float32x4_t result;
14801 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14802 : "=w"(result)
14803 : "w"(a), "w"(b)
14804 : /* No clobbers */);
14805 return result;
14808 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14809 vzip1q_f64 (float64x2_t a, float64x2_t b)
14811 float64x2_t result;
14812 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14813 : "=w"(result)
14814 : "w"(a), "w"(b)
14815 : /* No clobbers */);
14816 return result;
14819 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14820 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
14822 poly8x16_t result;
14823 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14824 : "=w"(result)
14825 : "w"(a), "w"(b)
14826 : /* No clobbers */);
14827 return result;
14830 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14831 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
14833 poly16x8_t result;
14834 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14835 : "=w"(result)
14836 : "w"(a), "w"(b)
14837 : /* No clobbers */);
14838 return result;
14841 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14842 vzip1q_s8 (int8x16_t a, int8x16_t b)
14844 int8x16_t result;
14845 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14846 : "=w"(result)
14847 : "w"(a), "w"(b)
14848 : /* No clobbers */);
14849 return result;
14852 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14853 vzip1q_s16 (int16x8_t a, int16x8_t b)
14855 int16x8_t result;
14856 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14857 : "=w"(result)
14858 : "w"(a), "w"(b)
14859 : /* No clobbers */);
14860 return result;
14863 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14864 vzip1q_s32 (int32x4_t a, int32x4_t b)
14866 int32x4_t result;
14867 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14868 : "=w"(result)
14869 : "w"(a), "w"(b)
14870 : /* No clobbers */);
14871 return result;
14874 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14875 vzip1q_s64 (int64x2_t a, int64x2_t b)
14877 int64x2_t result;
14878 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14879 : "=w"(result)
14880 : "w"(a), "w"(b)
14881 : /* No clobbers */);
14882 return result;
14885 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14886 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
14888 uint8x16_t result;
14889 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14890 : "=w"(result)
14891 : "w"(a), "w"(b)
14892 : /* No clobbers */);
14893 return result;
14896 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14897 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
14899 uint16x8_t result;
14900 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14901 : "=w"(result)
14902 : "w"(a), "w"(b)
14903 : /* No clobbers */);
14904 return result;
14907 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14908 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
14910 uint32x4_t result;
14911 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14912 : "=w"(result)
14913 : "w"(a), "w"(b)
14914 : /* No clobbers */);
14915 return result;
14918 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14919 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
14921 uint64x2_t result;
14922 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14923 : "=w"(result)
14924 : "w"(a), "w"(b)
14925 : /* No clobbers */);
14926 return result;
14929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14930 vzip2_f32 (float32x2_t a, float32x2_t b)
14932 float32x2_t result;
14933 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
14934 : "=w"(result)
14935 : "w"(a), "w"(b)
14936 : /* No clobbers */);
14937 return result;
14940 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14941 vzip2_p8 (poly8x8_t a, poly8x8_t b)
14943 poly8x8_t result;
14944 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
14945 : "=w"(result)
14946 : "w"(a), "w"(b)
14947 : /* No clobbers */);
14948 return result;
14951 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14952 vzip2_p16 (poly16x4_t a, poly16x4_t b)
14954 poly16x4_t result;
14955 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
14956 : "=w"(result)
14957 : "w"(a), "w"(b)
14958 : /* No clobbers */);
14959 return result;
14962 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14963 vzip2_s8 (int8x8_t a, int8x8_t b)
14965 int8x8_t result;
14966 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
14967 : "=w"(result)
14968 : "w"(a), "w"(b)
14969 : /* No clobbers */);
14970 return result;
14973 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14974 vzip2_s16 (int16x4_t a, int16x4_t b)
14976 int16x4_t result;
14977 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
14978 : "=w"(result)
14979 : "w"(a), "w"(b)
14980 : /* No clobbers */);
14981 return result;
14984 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14985 vzip2_s32 (int32x2_t a, int32x2_t b)
14987 int32x2_t result;
14988 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
14989 : "=w"(result)
14990 : "w"(a), "w"(b)
14991 : /* No clobbers */);
14992 return result;
14995 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14996 vzip2_u8 (uint8x8_t a, uint8x8_t b)
14998 uint8x8_t result;
14999 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15000 : "=w"(result)
15001 : "w"(a), "w"(b)
15002 : /* No clobbers */);
15003 return result;
15006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15007 vzip2_u16 (uint16x4_t a, uint16x4_t b)
15009 uint16x4_t result;
15010 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15011 : "=w"(result)
15012 : "w"(a), "w"(b)
15013 : /* No clobbers */);
15014 return result;
15017 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15018 vzip2_u32 (uint32x2_t a, uint32x2_t b)
15020 uint32x2_t result;
15021 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15022 : "=w"(result)
15023 : "w"(a), "w"(b)
15024 : /* No clobbers */);
15025 return result;
15028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15029 vzip2q_f32 (float32x4_t a, float32x4_t b)
15031 float32x4_t result;
15032 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15033 : "=w"(result)
15034 : "w"(a), "w"(b)
15035 : /* No clobbers */);
15036 return result;
15039 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15040 vzip2q_f64 (float64x2_t a, float64x2_t b)
15042 float64x2_t result;
15043 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15044 : "=w"(result)
15045 : "w"(a), "w"(b)
15046 : /* No clobbers */);
15047 return result;
15050 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15051 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
15053 poly8x16_t result;
15054 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15055 : "=w"(result)
15056 : "w"(a), "w"(b)
15057 : /* No clobbers */);
15058 return result;
15061 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15062 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
15064 poly16x8_t result;
15065 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15066 : "=w"(result)
15067 : "w"(a), "w"(b)
15068 : /* No clobbers */);
15069 return result;
15072 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15073 vzip2q_s8 (int8x16_t a, int8x16_t b)
15075 int8x16_t result;
15076 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15077 : "=w"(result)
15078 : "w"(a), "w"(b)
15079 : /* No clobbers */);
15080 return result;
15083 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15084 vzip2q_s16 (int16x8_t a, int16x8_t b)
15086 int16x8_t result;
15087 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15088 : "=w"(result)
15089 : "w"(a), "w"(b)
15090 : /* No clobbers */);
15091 return result;
15094 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15095 vzip2q_s32 (int32x4_t a, int32x4_t b)
15097 int32x4_t result;
15098 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15099 : "=w"(result)
15100 : "w"(a), "w"(b)
15101 : /* No clobbers */);
15102 return result;
15105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15106 vzip2q_s64 (int64x2_t a, int64x2_t b)
15108 int64x2_t result;
15109 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15110 : "=w"(result)
15111 : "w"(a), "w"(b)
15112 : /* No clobbers */);
15113 return result;
15116 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15117 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
15119 uint8x16_t result;
15120 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15121 : "=w"(result)
15122 : "w"(a), "w"(b)
15123 : /* No clobbers */);
15124 return result;
15127 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15128 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
15130 uint16x8_t result;
15131 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15132 : "=w"(result)
15133 : "w"(a), "w"(b)
15134 : /* No clobbers */);
15135 return result;
15138 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15139 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
15141 uint32x4_t result;
15142 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15143 : "=w"(result)
15144 : "w"(a), "w"(b)
15145 : /* No clobbers */);
15146 return result;
15149 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15150 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
15152 uint64x2_t result;
15153 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15154 : "=w"(result)
15155 : "w"(a), "w"(b)
15156 : /* No clobbers */);
15157 return result;
15160 /* End of temporary inline asm implementations. */
15162 /* Start of temporary inline asm for vldn, vstn and friends. */
15164 /* Create struct element types for duplicating loads.
15166 Create 2 element structures of:
15168 +------+----+----+----+----+
15169 | | 8 | 16 | 32 | 64 |
15170 +------+----+----+----+----+
15171 |int | Y | Y | N | N |
15172 +------+----+----+----+----+
15173 |uint | Y | Y | N | N |
15174 +------+----+----+----+----+
15175 |float | - | - | N | N |
15176 +------+----+----+----+----+
15177 |poly | Y | Y | - | - |
15178 +------+----+----+----+----+
15180 Create 3 element structures of:
15182 +------+----+----+----+----+
15183 | | 8 | 16 | 32 | 64 |
15184 +------+----+----+----+----+
15185 |int | Y | Y | Y | Y |
15186 +------+----+----+----+----+
15187 |uint | Y | Y | Y | Y |
15188 +------+----+----+----+----+
15189 |float | - | - | Y | Y |
15190 +------+----+----+----+----+
15191 |poly | Y | Y | - | - |
15192 +------+----+----+----+----+
15194 Create 4 element structures of:
15196 +------+----+----+----+----+
15197 | | 8 | 16 | 32 | 64 |
15198 +------+----+----+----+----+
15199 |int | Y | N | N | Y |
15200 +------+----+----+----+----+
15201 |uint | Y | N | N | Y |
15202 +------+----+----+----+----+
15203 |float | - | - | N | Y |
15204 +------+----+----+----+----+
15205 |poly | Y | N | - | - |
15206 +------+----+----+----+----+
15208 This is required for casting memory reference. */
15209 #define __STRUCTN(t, sz, nelem) \
15210 typedef struct t ## sz ## x ## nelem ## _t { \
15211 t ## sz ## _t val[nelem]; \
15212 } t ## sz ## x ## nelem ## _t;
15214 /* 2-element structs. */
15215 __STRUCTN (int, 8, 2)
15216 __STRUCTN (int, 16, 2)
15217 __STRUCTN (uint, 8, 2)
15218 __STRUCTN (uint, 16, 2)
15219 __STRUCTN (poly, 8, 2)
15220 __STRUCTN (poly, 16, 2)
15221 /* 3-element structs. */
15222 __STRUCTN (int, 8, 3)
15223 __STRUCTN (int, 16, 3)
15224 __STRUCTN (int, 32, 3)
15225 __STRUCTN (int, 64, 3)
15226 __STRUCTN (uint, 8, 3)
15227 __STRUCTN (uint, 16, 3)
15228 __STRUCTN (uint, 32, 3)
15229 __STRUCTN (uint, 64, 3)
15230 __STRUCTN (float, 32, 3)
15231 __STRUCTN (float, 64, 3)
15232 __STRUCTN (poly, 8, 3)
15233 __STRUCTN (poly, 16, 3)
15234 /* 4-element structs. */
15235 __STRUCTN (int, 8, 4)
15236 __STRUCTN (int, 64, 4)
15237 __STRUCTN (uint, 8, 4)
15238 __STRUCTN (uint, 64, 4)
15239 __STRUCTN (poly, 8, 4)
15240 __STRUCTN (float, 64, 4)
15241 #undef __STRUCTN
15243 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
15244 regsuffix, funcsuffix, Q) \
15245 __extension__ static __inline rettype \
15246 __attribute__ ((__always_inline__)) \
15247 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
15249 rettype result; \
15250 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
15251 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
15252 : "=Q"(result) \
15253 : "Q"(*(const structtype *)ptr) \
15254 : "memory", "v16", "v17"); \
15255 return result; \
15258 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
15259 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
15260 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
15261 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
15262 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
15263 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
15264 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
15265 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
15266 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
15267 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
15268 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
15269 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
15270 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
15271 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
15272 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
15273 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
15274 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
15275 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
15276 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
15277 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
15278 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
15279 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
15280 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
15281 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
15283 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
15284 lnsuffix, funcsuffix, Q) \
15285 __extension__ static __inline rettype \
15286 __attribute__ ((__always_inline__)) \
15287 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15288 rettype b, const int c) \
15290 rettype result; \
15291 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
15292 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
15293 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
15294 : "=Q"(result) \
15295 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
15296 : "memory", "v16", "v17"); \
15297 return result; \
15300 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
15301 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
15302 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
15303 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
15304 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
15305 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
15306 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
15307 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
15308 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
15309 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
15310 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
15311 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
15312 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
15313 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
15314 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
15315 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
15316 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
15317 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
15318 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
15319 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
15320 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
15321 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
15322 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
15323 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
15325 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
15326 regsuffix, funcsuffix, Q) \
15327 __extension__ static __inline rettype \
15328 __attribute__ ((__always_inline__)) \
15329 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
15331 rettype result; \
15332 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
15333 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
15334 : "=Q"(result) \
15335 : "Q"(*(const structtype *)ptr) \
15336 : "memory", "v16", "v17", "v18"); \
15337 return result; \
15340 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
15341 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
15342 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
15343 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
15344 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
15345 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
15346 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
15347 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
15348 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
15349 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
15350 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
15351 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
15352 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
15353 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
15354 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
15355 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
15356 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
15357 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
15358 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
15359 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
15360 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
15361 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
15362 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
15363 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
15365 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
15366 lnsuffix, funcsuffix, Q) \
15367 __extension__ static __inline rettype \
15368 __attribute__ ((__always_inline__)) \
15369 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15370 rettype b, const int c) \
15372 rettype result; \
15373 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
15374 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
15375 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
15376 : "=Q"(result) \
15377 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
15378 : "memory", "v16", "v17", "v18"); \
15379 return result; \
15382 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
15383 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
15384 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
15385 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
15386 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
15387 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
15388 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
15389 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
15390 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
15391 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
15392 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
15393 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
15394 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
15395 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
15396 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
15397 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
15398 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
15399 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
15400 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
15401 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
15402 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
15403 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
15404 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
15405 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
15407 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
15408 regsuffix, funcsuffix, Q) \
15409 __extension__ static __inline rettype \
15410 __attribute__ ((__always_inline__)) \
15411 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
15413 rettype result; \
15414 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
15415 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
15416 : "=Q"(result) \
15417 : "Q"(*(const structtype *)ptr) \
15418 : "memory", "v16", "v17", "v18", "v19"); \
15419 return result; \
15422 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
15423 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
15424 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
15425 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
15426 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
15427 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
15428 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
15429 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
15430 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
15431 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
15432 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
15433 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
15434 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
15435 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
15436 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
15437 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
15438 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
15439 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
15440 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
15441 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
15442 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
15443 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
15444 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
15445 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
15447 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
15448 lnsuffix, funcsuffix, Q) \
15449 __extension__ static __inline rettype \
15450 __attribute__ ((__always_inline__)) \
15451 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15452 rettype b, const int c) \
15454 rettype result; \
15455 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
15456 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
15457 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
15458 : "=Q"(result) \
15459 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
15460 : "memory", "v16", "v17", "v18", "v19"); \
15461 return result; \
15464 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
15465 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
15466 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
15467 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
15468 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
15469 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
15470 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
15471 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
15472 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
15473 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
15474 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
15475 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
15476 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
15477 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
15478 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
15479 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
15480 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
15481 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
15482 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
15483 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
15484 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
15485 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
15486 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
15487 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
15489 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
15490 lnsuffix, funcsuffix, Q) \
15491 __extension__ static __inline void \
15492 __attribute__ ((__always_inline__)) \
15493 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15494 intype b, const int c) \
15496 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
15497 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
15498 : "=Q"(*(intype *) ptr) \
15499 : "Q"(b), "i"(c) \
15500 : "memory", "v16", "v17"); \
15503 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
15504 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
15505 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
15506 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
15507 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
15508 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
15509 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
15510 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
15511 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
15512 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
15513 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
15514 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
15515 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
15516 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
15517 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
15518 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
15519 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
15520 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
15521 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
15522 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
15523 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
15524 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
15525 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
15526 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
15528 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
15529 lnsuffix, funcsuffix, Q) \
15530 __extension__ static __inline void \
15531 __attribute__ ((__always_inline__)) \
15532 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15533 intype b, const int c) \
15535 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
15536 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
15537 : "=Q"(*(intype *) ptr) \
15538 : "Q"(b), "i"(c) \
15539 : "memory", "v16", "v17", "v18"); \
15542 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
15543 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
15544 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
15545 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
15546 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
15547 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
15548 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
15549 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
15550 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
15551 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
15552 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
15553 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
15554 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
15555 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
15556 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
15557 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
15558 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
15559 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
15560 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
15561 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
15562 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
15563 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
15564 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
15565 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
15567 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
15568 lnsuffix, funcsuffix, Q) \
15569 __extension__ static __inline void \
15570 __attribute__ ((__always_inline__)) \
15571 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15572 intype b, const int c) \
15574 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
15575 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
15576 : "=Q"(*(intype *) ptr) \
15577 : "Q"(b), "i"(c) \
15578 : "memory", "v16", "v17", "v18", "v19"); \
15581 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
15582 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
15583 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
15584 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
15585 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
15586 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
15587 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
15588 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
15589 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
15590 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
15591 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
15592 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
15593 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
15594 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
15595 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
15596 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
15597 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
15598 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
15599 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
15600 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
15601 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
15602 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
15603 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
15604 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
15606 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15607 vaddlv_s32 (int32x2_t a)
15609 int64_t result;
15610 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
15611 return result;
15614 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15615 vaddlv_u32 (uint32x2_t a)
15617 uint64_t result;
15618 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
15619 return result;
15622 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15623 vpaddd_s64 (int64x2_t __a)
15625 return __builtin_aarch64_addpdi (__a);
15628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15629 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
15631 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
15634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15635 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
15637 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
15640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15641 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
15643 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
15646 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15647 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
15649 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
15652 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15653 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
15655 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
15658 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15659 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
15661 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
15664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15665 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
15667 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
15670 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15671 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
15673 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
15676 /* Table intrinsics. */
15678 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15679 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
15681 poly8x8_t result;
15682 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15683 : "=w"(result)
15684 : "w"(a), "w"(b)
15685 : /* No clobbers */);
15686 return result;
15689 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15690 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
15692 int8x8_t result;
15693 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15694 : "=w"(result)
15695 : "w"(a), "w"(b)
15696 : /* No clobbers */);
15697 return result;
15700 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15701 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
15703 uint8x8_t result;
15704 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15705 : "=w"(result)
15706 : "w"(a), "w"(b)
15707 : /* No clobbers */);
15708 return result;
15711 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15712 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
15714 poly8x16_t result;
15715 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
15716 : "=w"(result)
15717 : "w"(a), "w"(b)
15718 : /* No clobbers */);
15719 return result;
15722 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15723 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
15725 int8x16_t result;
15726 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
15727 : "=w"(result)
15728 : "w"(a), "w"(b)
15729 : /* No clobbers */);
15730 return result;
15733 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15734 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
15736 uint8x16_t result;
15737 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
15738 : "=w"(result)
15739 : "w"(a), "w"(b)
15740 : /* No clobbers */);
15741 return result;
15744 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15745 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
15747 int8x8_t result;
15748 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15749 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15750 :"=w"(result)
15751 :"Q"(tab),"w"(idx)
15752 :"memory", "v16", "v17");
15753 return result;
15756 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15757 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
15759 uint8x8_t result;
15760 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15761 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15762 :"=w"(result)
15763 :"Q"(tab),"w"(idx)
15764 :"memory", "v16", "v17");
15765 return result;
15768 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15769 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
15771 poly8x8_t result;
15772 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15773 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15774 :"=w"(result)
15775 :"Q"(tab),"w"(idx)
15776 :"memory", "v16", "v17");
15777 return result;
15780 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15781 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
15783 int8x16_t result;
15784 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15785 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15786 :"=w"(result)
15787 :"Q"(tab),"w"(idx)
15788 :"memory", "v16", "v17");
15789 return result;
15792 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15793 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
15795 uint8x16_t result;
15796 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15797 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15798 :"=w"(result)
15799 :"Q"(tab),"w"(idx)
15800 :"memory", "v16", "v17");
15801 return result;
15804 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15805 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
15807 poly8x16_t result;
15808 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15809 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15810 :"=w"(result)
15811 :"Q"(tab),"w"(idx)
15812 :"memory", "v16", "v17");
15813 return result;
15816 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15817 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
15819 int8x8_t result;
15820 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15821 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15822 :"=w"(result)
15823 :"Q"(tab),"w"(idx)
15824 :"memory", "v16", "v17", "v18");
15825 return result;
15828 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15829 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
15831 uint8x8_t result;
15832 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15833 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15834 :"=w"(result)
15835 :"Q"(tab),"w"(idx)
15836 :"memory", "v16", "v17", "v18");
15837 return result;
15840 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15841 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
15843 poly8x8_t result;
15844 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15845 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15846 :"=w"(result)
15847 :"Q"(tab),"w"(idx)
15848 :"memory", "v16", "v17", "v18");
15849 return result;
15852 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15853 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
15855 int8x16_t result;
15856 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15857 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15858 :"=w"(result)
15859 :"Q"(tab),"w"(idx)
15860 :"memory", "v16", "v17", "v18");
15861 return result;
15864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15865 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
15867 uint8x16_t result;
15868 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15869 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15870 :"=w"(result)
15871 :"Q"(tab),"w"(idx)
15872 :"memory", "v16", "v17", "v18");
15873 return result;
15876 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15877 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
15879 poly8x16_t result;
15880 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15881 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15882 :"=w"(result)
15883 :"Q"(tab),"w"(idx)
15884 :"memory", "v16", "v17", "v18");
15885 return result;
15888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15889 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
15891 int8x8_t result;
15892 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15893 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15894 :"=w"(result)
15895 :"Q"(tab),"w"(idx)
15896 :"memory", "v16", "v17", "v18", "v19");
15897 return result;
15900 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15901 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
15903 uint8x8_t result;
15904 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15905 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15906 :"=w"(result)
15907 :"Q"(tab),"w"(idx)
15908 :"memory", "v16", "v17", "v18", "v19");
15909 return result;
15912 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15913 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
15915 poly8x8_t result;
15916 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15917 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15918 :"=w"(result)
15919 :"Q"(tab),"w"(idx)
15920 :"memory", "v16", "v17", "v18", "v19");
15921 return result;
15925 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15926 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
15928 int8x16_t result;
15929 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15930 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15931 :"=w"(result)
15932 :"Q"(tab),"w"(idx)
15933 :"memory", "v16", "v17", "v18", "v19");
15934 return result;
15937 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15938 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
15940 uint8x16_t result;
15941 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15942 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15943 :"=w"(result)
15944 :"Q"(tab),"w"(idx)
15945 :"memory", "v16", "v17", "v18", "v19");
15946 return result;
15949 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15950 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
15952 poly8x16_t result;
15953 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15954 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15955 :"=w"(result)
15956 :"Q"(tab),"w"(idx)
15957 :"memory", "v16", "v17", "v18", "v19");
15958 return result;
15962 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15963 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
15965 int8x8_t result = r;
15966 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15967 : "+w"(result)
15968 : "w"(tab), "w"(idx)
15969 : /* No clobbers */);
15970 return result;
15973 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15974 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
15976 uint8x8_t result = r;
15977 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15978 : "+w"(result)
15979 : "w"(tab), "w"(idx)
15980 : /* No clobbers */);
15981 return result;
15984 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15985 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
15987 poly8x8_t result = r;
15988 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15989 : "+w"(result)
15990 : "w"(tab), "w"(idx)
15991 : /* No clobbers */);
15992 return result;
15995 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15996 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
15998 int8x16_t result = r;
15999 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16000 : "+w"(result)
16001 : "w"(tab), "w"(idx)
16002 : /* No clobbers */);
16003 return result;
16006 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16007 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
16009 uint8x16_t result = r;
16010 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16011 : "+w"(result)
16012 : "w"(tab), "w"(idx)
16013 : /* No clobbers */);
16014 return result;
16017 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16018 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
16020 poly8x16_t result = r;
16021 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16022 : "+w"(result)
16023 : "w"(tab), "w"(idx)
16024 : /* No clobbers */);
16025 return result;
16028 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16029 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
16031 int8x8_t result = r;
16032 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16033 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16034 :"+w"(result)
16035 :"Q"(tab),"w"(idx)
16036 :"memory", "v16", "v17");
16037 return result;
16040 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16041 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
16043 uint8x8_t result = r;
16044 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16045 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16046 :"+w"(result)
16047 :"Q"(tab),"w"(idx)
16048 :"memory", "v16", "v17");
16049 return result;
16052 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16053 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
16055 poly8x8_t result = r;
16056 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16057 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16058 :"+w"(result)
16059 :"Q"(tab),"w"(idx)
16060 :"memory", "v16", "v17");
16061 return result;
16065 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16066 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
16068 int8x16_t result = r;
16069 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16070 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16071 :"+w"(result)
16072 :"Q"(tab),"w"(idx)
16073 :"memory", "v16", "v17");
16074 return result;
16077 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16078 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
16080 uint8x16_t result = r;
16081 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16082 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16083 :"+w"(result)
16084 :"Q"(tab),"w"(idx)
16085 :"memory", "v16", "v17");
16086 return result;
16089 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16090 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
16092 poly8x16_t result = r;
16093 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16094 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16095 :"+w"(result)
16096 :"Q"(tab),"w"(idx)
16097 :"memory", "v16", "v17");
16098 return result;
16102 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16103 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
16105 int8x8_t result = r;
16106 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16107 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16108 :"+w"(result)
16109 :"Q"(tab),"w"(idx)
16110 :"memory", "v16", "v17", "v18");
16111 return result;
16114 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16115 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
16117 uint8x8_t result = r;
16118 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16119 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16120 :"+w"(result)
16121 :"Q"(tab),"w"(idx)
16122 :"memory", "v16", "v17", "v18");
16123 return result;
16126 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16127 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
16129 poly8x8_t result = r;
16130 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16131 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16132 :"+w"(result)
16133 :"Q"(tab),"w"(idx)
16134 :"memory", "v16", "v17", "v18");
16135 return result;
16139 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16140 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
16142 int8x16_t result = r;
16143 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16144 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16145 :"+w"(result)
16146 :"Q"(tab),"w"(idx)
16147 :"memory", "v16", "v17", "v18");
16148 return result;
16151 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16152 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
16154 uint8x16_t result = r;
16155 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16156 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16157 :"+w"(result)
16158 :"Q"(tab),"w"(idx)
16159 :"memory", "v16", "v17", "v18");
16160 return result;
16163 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16164 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
16166 poly8x16_t result = r;
16167 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16168 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16169 :"+w"(result)
16170 :"Q"(tab),"w"(idx)
16171 :"memory", "v16", "v17", "v18");
16172 return result;
16176 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16177 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
16179 int8x8_t result = r;
16180 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16181 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16182 :"+w"(result)
16183 :"Q"(tab),"w"(idx)
16184 :"memory", "v16", "v17", "v18", "v19");
16185 return result;
16188 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16189 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
16191 uint8x8_t result = r;
16192 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16193 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16194 :"+w"(result)
16195 :"Q"(tab),"w"(idx)
16196 :"memory", "v16", "v17", "v18", "v19");
16197 return result;
16200 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16201 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
16203 poly8x8_t result = r;
16204 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16205 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16206 :"+w"(result)
16207 :"Q"(tab),"w"(idx)
16208 :"memory", "v16", "v17", "v18", "v19");
16209 return result;
16213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16214 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
16216 int8x16_t result = r;
16217 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16218 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16219 :"+w"(result)
16220 :"Q"(tab),"w"(idx)
16221 :"memory", "v16", "v17", "v18", "v19");
16222 return result;
16225 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16226 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
16228 uint8x16_t result = r;
16229 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16230 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16231 :"+w"(result)
16232 :"Q"(tab),"w"(idx)
16233 :"memory", "v16", "v17", "v18", "v19");
16234 return result;
16237 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16238 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
16240 poly8x16_t result = r;
16241 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16242 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16243 :"+w"(result)
16244 :"Q"(tab),"w"(idx)
16245 :"memory", "v16", "v17", "v18", "v19");
16246 return result;
16249 /* V7 legacy table intrinsics. */
16251 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16252 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
16254 int8x8_t result;
16255 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
16256 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16257 : "=w"(result)
16258 : "w"(temp), "w"(idx)
16259 : /* No clobbers */);
16260 return result;
16263 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16264 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
16266 uint8x8_t result;
16267 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
16268 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16269 : "=w"(result)
16270 : "w"(temp), "w"(idx)
16271 : /* No clobbers */);
16272 return result;
16275 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16276 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
16278 poly8x8_t result;
16279 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
16280 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16281 : "=w"(result)
16282 : "w"(temp), "w"(idx)
16283 : /* No clobbers */);
16284 return result;
16287 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16288 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
16290 int8x8_t result;
16291 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
16292 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16293 : "=w"(result)
16294 : "w"(temp), "w"(idx)
16295 : /* No clobbers */);
16296 return result;
16299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16300 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
16302 uint8x8_t result;
16303 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
16304 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16305 : "=w"(result)
16306 : "w"(temp), "w"(idx)
16307 : /* No clobbers */);
16308 return result;
16311 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16312 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
16314 poly8x8_t result;
16315 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
16316 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16317 : "=w"(result)
16318 : "w"(temp), "w"(idx)
16319 : /* No clobbers */);
16320 return result;
16323 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16324 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
16326 int8x8_t result;
16327 int8x16x2_t temp;
16328 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
16329 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
16330 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16331 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16332 : "=w"(result)
16333 : "Q"(temp), "w"(idx)
16334 : "v16", "v17", "memory");
16335 return result;
16338 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16339 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
16341 uint8x8_t result;
16342 uint8x16x2_t temp;
16343 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
16344 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
16345 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16346 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16347 : "=w"(result)
16348 : "Q"(temp), "w"(idx)
16349 : "v16", "v17", "memory");
16350 return result;
16353 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16354 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
16356 poly8x8_t result;
16357 poly8x16x2_t temp;
16358 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
16359 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
16360 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16361 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16362 : "=w"(result)
16363 : "Q"(temp), "w"(idx)
16364 : "v16", "v17", "memory");
16365 return result;
16368 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16369 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
16371 int8x8_t result;
16372 int8x16x2_t temp;
16373 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
16374 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
16375 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16376 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16377 : "=w"(result)
16378 : "Q"(temp), "w"(idx)
16379 : "v16", "v17", "memory");
16380 return result;
16383 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16384 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
16386 uint8x8_t result;
16387 uint8x16x2_t temp;
16388 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
16389 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
16390 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16391 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16392 : "=w"(result)
16393 : "Q"(temp), "w"(idx)
16394 : "v16", "v17", "memory");
16395 return result;
16398 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16399 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
16401 poly8x8_t result;
16402 poly8x16x2_t temp;
16403 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
16404 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
16405 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16406 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16407 : "=w"(result)
16408 : "Q"(temp), "w"(idx)
16409 : "v16", "v17", "memory");
16410 return result;
16413 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16414 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
16416 int8x8_t result;
16417 int8x8_t tmp1;
16418 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
16419 __asm__ ("movi %0.8b, 8\n\t"
16420 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16421 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
16422 "bsl %0.8b, %4.8b, %1.8b\n\t"
16423 : "+w"(result), "=w"(tmp1)
16424 : "w"(temp), "w"(idx), "w"(r)
16425 : /* No clobbers */);
16426 return result;
16429 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16430 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
16432 uint8x8_t result;
16433 uint8x8_t tmp1;
16434 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
16435 __asm__ ("movi %0.8b, 8\n\t"
16436 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16437 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
16438 "bsl %0.8b, %4.8b, %1.8b\n\t"
16439 : "+w"(result), "=w"(tmp1)
16440 : "w"(temp), "w"(idx), "w"(r)
16441 : /* No clobbers */);
16442 return result;
16445 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16446 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
16448 poly8x8_t result;
16449 poly8x8_t tmp1;
16450 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
16451 __asm__ ("movi %0.8b, 8\n\t"
16452 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16453 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
16454 "bsl %0.8b, %4.8b, %1.8b\n\t"
16455 : "+w"(result), "=w"(tmp1)
16456 : "w"(temp), "w"(idx), "w"(r)
16457 : /* No clobbers */);
16458 return result;
16461 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16462 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
16464 int8x8_t result = r;
16465 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
16466 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
16467 : "+w"(result)
16468 : "w"(temp), "w"(idx)
16469 : /* No clobbers */);
16470 return result;
16473 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16474 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
16476 uint8x8_t result = r;
16477 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
16478 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
16479 : "+w"(result)
16480 : "w"(temp), "w"(idx)
16481 : /* No clobbers */);
16482 return result;
16485 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16486 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
16488 poly8x8_t result = r;
16489 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
16490 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
16491 : "+w"(result)
16492 : "w"(temp), "w"(idx)
16493 : /* No clobbers */);
16494 return result;
16497 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16498 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
16500 int8x8_t result;
16501 int8x8_t tmp1;
16502 int8x16x2_t temp;
16503 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
16504 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
16505 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
16506 "movi %0.8b, 24\n\t"
16507 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16508 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
16509 "bsl %0.8b, %4.8b, %1.8b\n\t"
16510 : "+w"(result), "=w"(tmp1)
16511 : "Q"(temp), "w"(idx), "w"(r)
16512 : "v16", "v17", "memory");
16513 return result;
16516 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16517 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
16519 uint8x8_t result;
16520 uint8x8_t tmp1;
16521 uint8x16x2_t temp;
16522 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
16523 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
16524 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
16525 "movi %0.8b, 24\n\t"
16526 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16527 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
16528 "bsl %0.8b, %4.8b, %1.8b\n\t"
16529 : "+w"(result), "=w"(tmp1)
16530 : "Q"(temp), "w"(idx), "w"(r)
16531 : "v16", "v17", "memory");
16532 return result;
16535 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16536 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
16538 poly8x8_t result;
16539 poly8x8_t tmp1;
16540 poly8x16x2_t temp;
16541 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
16542 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
16543 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
16544 "movi %0.8b, 24\n\t"
16545 "cmhs %0.8b, %3.8b, %0.8b\n\t"
16546 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
16547 "bsl %0.8b, %4.8b, %1.8b\n\t"
16548 : "+w"(result), "=w"(tmp1)
16549 : "Q"(temp), "w"(idx), "w"(r)
16550 : "v16", "v17", "memory");
16551 return result;
16554 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16555 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
16557 int8x8_t result = r;
16558 int8x16x2_t temp;
16559 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
16560 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
16561 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16562 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16563 : "+w"(result)
16564 : "Q"(temp), "w"(idx)
16565 : "v16", "v17", "memory");
16566 return result;
16569 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16570 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
16572 uint8x8_t result = r;
16573 uint8x16x2_t temp;
16574 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
16575 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
16576 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16577 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16578 : "+w"(result)
16579 : "Q"(temp), "w"(idx)
16580 : "v16", "v17", "memory");
16581 return result;
16584 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16585 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
16587 poly8x8_t result = r;
16588 poly8x16x2_t temp;
16589 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
16590 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
16591 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
16592 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
16593 : "+w"(result)
16594 : "Q"(temp), "w"(idx)
16595 : "v16", "v17", "memory");
16596 return result;
16599 /* End of temporary inline asm. */
16601 /* Start of optimal implementations in approved order. */
16603 /* vabs */
16605 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16606 vabs_f32 (float32x2_t __a)
16608 return __builtin_aarch64_absv2sf (__a);
16611 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16612 vabs_f64 (float64x1_t __a)
16614 return __builtin_fabs (__a);
16617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16618 vabs_s8 (int8x8_t __a)
16620 return __builtin_aarch64_absv8qi (__a);
16623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16624 vabs_s16 (int16x4_t __a)
16626 return __builtin_aarch64_absv4hi (__a);
16629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16630 vabs_s32 (int32x2_t __a)
16632 return __builtin_aarch64_absv2si (__a);
16635 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16636 vabs_s64 (int64x1_t __a)
16638 return __builtin_llabs (__a);
16641 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16642 vabsq_f32 (float32x4_t __a)
16644 return __builtin_aarch64_absv4sf (__a);
16647 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16648 vabsq_f64 (float64x2_t __a)
16650 return __builtin_aarch64_absv2df (__a);
16653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16654 vabsq_s8 (int8x16_t __a)
16656 return __builtin_aarch64_absv16qi (__a);
16659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16660 vabsq_s16 (int16x8_t __a)
16662 return __builtin_aarch64_absv8hi (__a);
16665 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16666 vabsq_s32 (int32x4_t __a)
16668 return __builtin_aarch64_absv4si (__a);
16671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16672 vabsq_s64 (int64x2_t __a)
16674 return __builtin_aarch64_absv2di (__a);
16677 /* vadd */
16679 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16680 vaddd_s64 (int64x1_t __a, int64x1_t __b)
16682 return __a + __b;
16685 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16686 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
16688 return __a + __b;
16691 /* vaddv */
16693 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16694 vaddv_s8 (int8x8_t __a)
16696 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
16699 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16700 vaddv_s16 (int16x4_t __a)
16702 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
16705 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16706 vaddv_s32 (int32x2_t __a)
16708 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
16711 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16712 vaddv_u8 (uint8x8_t __a)
16714 return vget_lane_u8 ((uint8x8_t)
16715 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
16718 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16719 vaddv_u16 (uint16x4_t __a)
16721 return vget_lane_u16 ((uint16x4_t)
16722 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
16725 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16726 vaddv_u32 (uint32x2_t __a)
16728 return vget_lane_u32 ((uint32x2_t)
16729 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
16732 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16733 vaddvq_s8 (int8x16_t __a)
16735 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
16738 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16739 vaddvq_s16 (int16x8_t __a)
16741 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
16744 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16745 vaddvq_s32 (int32x4_t __a)
16747 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
16750 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16751 vaddvq_s64 (int64x2_t __a)
16753 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
16756 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16757 vaddvq_u8 (uint8x16_t __a)
16759 return vgetq_lane_u8 ((uint8x16_t)
16760 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
16763 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16764 vaddvq_u16 (uint16x8_t __a)
16766 return vgetq_lane_u16 ((uint16x8_t)
16767 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
16770 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16771 vaddvq_u32 (uint32x4_t __a)
16773 return vgetq_lane_u32 ((uint32x4_t)
16774 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
16777 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16778 vaddvq_u64 (uint64x2_t __a)
16780 return vgetq_lane_u64 ((uint64x2_t)
16781 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
16784 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16785 vaddv_f32 (float32x2_t __a)
16787 float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
16788 return vget_lane_f32 (t, 0);
16791 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16792 vaddvq_f32 (float32x4_t __a)
16794 float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
16795 return vgetq_lane_f32 (t, 0);
16798 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16799 vaddvq_f64 (float64x2_t __a)
16801 float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
16802 return vgetq_lane_f64 (t, 0);
16805 /* vcage */
16807 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16808 vcages_f32 (float32_t __a, float32_t __b)
16810 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
16813 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16814 vcage_f32 (float32x2_t __a, float32x2_t __b)
16816 return vabs_f32 (__a) >= vabs_f32 (__b);
16819 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16820 vcageq_f32 (float32x4_t __a, float32x4_t __b)
16822 return vabsq_f32 (__a) >= vabsq_f32 (__b);
16825 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16826 vcaged_f64 (float64_t __a, float64_t __b)
16828 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
16831 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16832 vcageq_f64 (float64x2_t __a, float64x2_t __b)
16834 return vabsq_f64 (__a) >= vabsq_f64 (__b);
16837 /* vcagt */
16839 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16840 vcagts_f32 (float32_t __a, float32_t __b)
16842 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
16845 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16846 vcagt_f32 (float32x2_t __a, float32x2_t __b)
16848 return vabs_f32 (__a) > vabs_f32 (__b);
16851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16852 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
16854 return vabsq_f32 (__a) > vabsq_f32 (__b);
16857 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16858 vcagtd_f64 (float64_t __a, float64_t __b)
16860 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
16863 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16864 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
16866 return vabsq_f64 (__a) > vabsq_f64 (__b);
16869 /* vcale */
16871 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16872 vcale_f32 (float32x2_t __a, float32x2_t __b)
16874 return vabs_f32 (__a) <= vabs_f32 (__b);
16877 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16878 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
16880 return vabsq_f32 (__a) <= vabsq_f32 (__b);
16883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16884 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
16886 return vabsq_f64 (__a) <= vabsq_f64 (__b);
16889 /* vcalt */
16891 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16892 vcalt_f32 (float32x2_t __a, float32x2_t __b)
16894 return vabs_f32 (__a) < vabs_f32 (__b);
16897 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16898 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
16900 return vabsq_f32 (__a) < vabsq_f32 (__b);
16903 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16904 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
16906 return vabsq_f64 (__a) < vabsq_f64 (__b);
16909 /* vceq - vector. */
16911 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16912 vceq_f32 (float32x2_t __a, float32x2_t __b)
16914 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
16917 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16918 vceq_f64 (float64x1_t __a, float64x1_t __b)
16920 return __a == __b ? -1ll : 0ll;
16923 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16924 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
16926 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16927 (int8x8_t) __b);
16930 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16931 vceq_s8 (int8x8_t __a, int8x8_t __b)
16933 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
16936 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16937 vceq_s16 (int16x4_t __a, int16x4_t __b)
16939 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
16942 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16943 vceq_s32 (int32x2_t __a, int32x2_t __b)
16945 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
16948 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16949 vceq_s64 (int64x1_t __a, int64x1_t __b)
16951 return __a == __b ? -1ll : 0ll;
16954 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16955 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
16957 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16958 (int8x8_t) __b);
16961 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16962 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
16964 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
16965 (int16x4_t) __b);
16968 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16969 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
16971 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
16972 (int32x2_t) __b);
16975 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16976 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
16978 return __a == __b ? -1ll : 0ll;
16981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16982 vceqq_f32 (float32x4_t __a, float32x4_t __b)
16984 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
16987 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16988 vceqq_f64 (float64x2_t __a, float64x2_t __b)
16990 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
16993 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16994 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
16996 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16997 (int8x16_t) __b);
17000 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17001 vceqq_s8 (int8x16_t __a, int8x16_t __b)
17003 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17006 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17007 vceqq_s16 (int16x8_t __a, int16x8_t __b)
17009 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17012 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17013 vceqq_s32 (int32x4_t __a, int32x4_t __b)
17015 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17018 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17019 vceqq_s64 (int64x2_t __a, int64x2_t __b)
17021 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17024 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17025 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
17027 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17028 (int8x16_t) __b);
17031 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17032 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
17034 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17035 (int16x8_t) __b);
17038 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17039 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
17041 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17042 (int32x4_t) __b);
17045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17046 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
17048 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17049 (int64x2_t) __b);
17052 /* vceq - scalar. */
17054 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17055 vceqs_f32 (float32_t __a, float32_t __b)
17057 return __a == __b ? -1 : 0;
17060 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17061 vceqd_s64 (int64x1_t __a, int64x1_t __b)
17063 return __a == __b ? -1ll : 0ll;
17066 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17067 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
17069 return __a == __b ? -1ll : 0ll;
17072 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17073 vceqd_f64 (float64_t __a, float64_t __b)
17075 return __a == __b ? -1ll : 0ll;
17078 /* vceqz - vector. */
17080 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17081 vceqz_f32 (float32x2_t __a)
17083 float32x2_t __b = {0.0f, 0.0f};
17084 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
17087 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17088 vceqz_f64 (float64x1_t __a)
17090 return __a == 0.0 ? -1ll : 0ll;
17093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17094 vceqz_p8 (poly8x8_t __a)
17096 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17097 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17098 (int8x8_t) __b);
17101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17102 vceqz_s8 (int8x8_t __a)
17104 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17105 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
17108 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17109 vceqz_s16 (int16x4_t __a)
17111 int16x4_t __b = {0, 0, 0, 0};
17112 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
17115 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17116 vceqz_s32 (int32x2_t __a)
17118 int32x2_t __b = {0, 0};
17119 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
17122 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17123 vceqz_s64 (int64x1_t __a)
17125 return __a == 0ll ? -1ll : 0ll;
17128 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17129 vceqz_u8 (uint8x8_t __a)
17131 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17132 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17133 (int8x8_t) __b);
17136 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17137 vceqz_u16 (uint16x4_t __a)
17139 uint16x4_t __b = {0, 0, 0, 0};
17140 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
17141 (int16x4_t) __b);
17144 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17145 vceqz_u32 (uint32x2_t __a)
17147 uint32x2_t __b = {0, 0};
17148 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
17149 (int32x2_t) __b);
17152 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17153 vceqz_u64 (uint64x1_t __a)
17155 return __a == 0ll ? -1ll : 0ll;
17158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17159 vceqzq_f32 (float32x4_t __a)
17161 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17162 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
17165 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17166 vceqzq_f64 (float64x2_t __a)
17168 float64x2_t __b = {0.0, 0.0};
17169 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
17172 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17173 vceqzq_p8 (poly8x16_t __a)
17175 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17176 0, 0, 0, 0, 0, 0, 0, 0};
17177 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17178 (int8x16_t) __b);
17181 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17182 vceqzq_s8 (int8x16_t __a)
17184 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17185 0, 0, 0, 0, 0, 0, 0, 0};
17186 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17189 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17190 vceqzq_s16 (int16x8_t __a)
17192 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17193 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17196 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17197 vceqzq_s32 (int32x4_t __a)
17199 int32x4_t __b = {0, 0, 0, 0};
17200 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17203 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17204 vceqzq_s64 (int64x2_t __a)
17206 int64x2_t __b = {0, 0};
17207 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17210 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17211 vceqzq_u8 (uint8x16_t __a)
17213 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17214 0, 0, 0, 0, 0, 0, 0, 0};
17215 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17216 (int8x16_t) __b);
17219 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17220 vceqzq_u16 (uint16x8_t __a)
17222 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17223 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17224 (int16x8_t) __b);
17227 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17228 vceqzq_u32 (uint32x4_t __a)
17230 uint32x4_t __b = {0, 0, 0, 0};
17231 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17232 (int32x4_t) __b);
17235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17236 vceqzq_u64 (uint64x2_t __a)
17238 uint64x2_t __b = {0, 0};
17239 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17240 (int64x2_t) __b);
17243 /* vceqz - scalar. */
17245 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17246 vceqzs_f32 (float32_t __a)
17248 return __a == 0.0f ? -1 : 0;
17251 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17252 vceqzd_s64 (int64x1_t __a)
17254 return __a == 0 ? -1ll : 0ll;
17257 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17258 vceqzd_u64 (int64x1_t __a)
17260 return __a == 0 ? -1ll : 0ll;
17263 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17264 vceqzd_f64 (float64_t __a)
17266 return __a == 0.0 ? -1ll : 0ll;
17269 /* vcge - vector. */
17271 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17272 vcge_f32 (float32x2_t __a, float32x2_t __b)
17274 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
17277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17278 vcge_f64 (float64x1_t __a, float64x1_t __b)
17280 return __a >= __b ? -1ll : 0ll;
17283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17284 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
17286 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
17287 (int8x8_t) __b);
17290 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17291 vcge_s8 (int8x8_t __a, int8x8_t __b)
17293 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
17296 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17297 vcge_s16 (int16x4_t __a, int16x4_t __b)
17299 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
17302 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17303 vcge_s32 (int32x2_t __a, int32x2_t __b)
17305 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
17308 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17309 vcge_s64 (int64x1_t __a, int64x1_t __b)
17311 return __a >= __b ? -1ll : 0ll;
17314 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17315 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
17317 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
17318 (int8x8_t) __b);
17321 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17322 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
17324 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
17325 (int16x4_t) __b);
17328 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17329 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
17331 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
17332 (int32x2_t) __b);
17335 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17336 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
17338 return __a >= __b ? -1ll : 0ll;
17341 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17342 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
17344 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
17347 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17348 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
17350 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
17353 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17354 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
17356 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
17357 (int8x16_t) __b);
17360 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17361 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
17363 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
17366 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17367 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
17369 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
17372 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17373 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
17375 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
17378 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17379 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
17381 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
17384 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17385 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
17387 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
17388 (int8x16_t) __b);
17391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17392 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
17394 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
17395 (int16x8_t) __b);
17398 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17399 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
17401 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
17402 (int32x4_t) __b);
17405 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17406 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
17408 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
17409 (int64x2_t) __b);
17412 /* vcge - scalar. */
17414 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17415 vcges_f32 (float32_t __a, float32_t __b)
17417 return __a >= __b ? -1 : 0;
17420 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17421 vcged_s64 (int64x1_t __a, int64x1_t __b)
17423 return __a >= __b ? -1ll : 0ll;
17426 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17427 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
17429 return __a >= __b ? -1ll : 0ll;
17432 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17433 vcged_f64 (float64_t __a, float64_t __b)
17435 return __a >= __b ? -1ll : 0ll;
17438 /* vcgez - vector. */
17440 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17441 vcgez_f32 (float32x2_t __a)
17443 float32x2_t __b = {0.0f, 0.0f};
17444 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
17447 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17448 vcgez_f64 (float64x1_t __a)
17450 return __a >= 0.0 ? -1ll : 0ll;
17453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17454 vcgez_p8 (poly8x8_t __a)
17456 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17457 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
17458 (int8x8_t) __b);
17461 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17462 vcgez_s8 (int8x8_t __a)
17464 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17465 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
17468 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17469 vcgez_s16 (int16x4_t __a)
17471 int16x4_t __b = {0, 0, 0, 0};
17472 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
17475 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17476 vcgez_s32 (int32x2_t __a)
17478 int32x2_t __b = {0, 0};
17479 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
17482 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17483 vcgez_s64 (int64x1_t __a)
17485 return __a >= 0ll ? -1ll : 0ll;
17488 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17489 vcgez_u8 (uint8x8_t __a)
17491 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17492 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
17493 (int8x8_t) __b);
17496 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17497 vcgez_u16 (uint16x4_t __a)
17499 uint16x4_t __b = {0, 0, 0, 0};
17500 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
17501 (int16x4_t) __b);
17504 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17505 vcgez_u32 (uint32x2_t __a)
17507 uint32x2_t __b = {0, 0};
17508 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
17509 (int32x2_t) __b);
17512 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17513 vcgez_u64 (uint64x1_t __a)
17515 return __a >= 0ll ? -1ll : 0ll;
17518 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17519 vcgezq_f32 (float32x4_t __a)
17521 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17522 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
17525 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17526 vcgezq_f64 (float64x2_t __a)
17528 float64x2_t __b = {0.0, 0.0};
17529 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
17532 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17533 vcgezq_p8 (poly8x16_t __a)
17535 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17536 0, 0, 0, 0, 0, 0, 0, 0};
17537 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
17538 (int8x16_t) __b);
17541 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17542 vcgezq_s8 (int8x16_t __a)
17544 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17545 0, 0, 0, 0, 0, 0, 0, 0};
17546 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
17549 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17550 vcgezq_s16 (int16x8_t __a)
17552 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17553 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
17556 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17557 vcgezq_s32 (int32x4_t __a)
17559 int32x4_t __b = {0, 0, 0, 0};
17560 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
17563 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17564 vcgezq_s64 (int64x2_t __a)
17566 int64x2_t __b = {0, 0};
17567 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
17570 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17571 vcgezq_u8 (uint8x16_t __a)
17573 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17574 0, 0, 0, 0, 0, 0, 0, 0};
17575 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
17576 (int8x16_t) __b);
17579 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17580 vcgezq_u16 (uint16x8_t __a)
17582 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17583 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
17584 (int16x8_t) __b);
17587 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17588 vcgezq_u32 (uint32x4_t __a)
17590 uint32x4_t __b = {0, 0, 0, 0};
17591 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
17592 (int32x4_t) __b);
17595 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17596 vcgezq_u64 (uint64x2_t __a)
17598 uint64x2_t __b = {0, 0};
17599 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
17600 (int64x2_t) __b);
17603 /* vcgez - scalar. */
17605 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17606 vcgezs_f32 (float32_t __a)
17608 return __a >= 0.0f ? -1 : 0;
17611 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17612 vcgezd_s64 (int64x1_t __a)
17614 return __a >= 0 ? -1ll : 0ll;
17617 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17618 vcgezd_u64 (int64x1_t __a)
17620 return __a >= 0 ? -1ll : 0ll;
17623 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17624 vcgezd_f64 (float64_t __a)
17626 return __a >= 0.0 ? -1ll : 0ll;
17629 /* vcgt - vector. */
17631 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17632 vcgt_f32 (float32x2_t __a, float32x2_t __b)
17634 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
17637 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17638 vcgt_f64 (float64x1_t __a, float64x1_t __b)
17640 return __a > __b ? -1ll : 0ll;
17643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17644 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
17646 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
17647 (int8x8_t) __b);
17650 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17651 vcgt_s8 (int8x8_t __a, int8x8_t __b)
17653 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
17656 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17657 vcgt_s16 (int16x4_t __a, int16x4_t __b)
17659 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
17662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17663 vcgt_s32 (int32x2_t __a, int32x2_t __b)
17665 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
17668 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17669 vcgt_s64 (int64x1_t __a, int64x1_t __b)
17671 return __a > __b ? -1ll : 0ll;
17674 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17675 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
17677 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
17678 (int8x8_t) __b);
17681 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17682 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
17684 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
17685 (int16x4_t) __b);
17688 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17689 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
17691 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
17692 (int32x2_t) __b);
17695 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17696 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
17698 return __a > __b ? -1ll : 0ll;
17701 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17702 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
17704 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
17707 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17708 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
17710 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
17713 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17714 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
17716 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
17717 (int8x16_t) __b);
17720 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17721 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
17723 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
17726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17727 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
17729 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
17732 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17733 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
17735 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
17738 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17739 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
17741 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
17744 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17745 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
17747 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
17748 (int8x16_t) __b);
17751 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17752 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
17754 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
17755 (int16x8_t) __b);
17758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17759 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
17761 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
17762 (int32x4_t) __b);
17765 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17766 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
17768 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
17769 (int64x2_t) __b);
17772 /* vcgt - scalar. */
17774 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17775 vcgts_f32 (float32_t __a, float32_t __b)
17777 return __a > __b ? -1 : 0;
17780 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17781 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
17783 return __a > __b ? -1ll : 0ll;
17786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17787 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
17789 return __a > __b ? -1ll : 0ll;
17792 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17793 vcgtd_f64 (float64_t __a, float64_t __b)
17795 return __a > __b ? -1ll : 0ll;
17798 /* vcgtz - vector. */
17800 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17801 vcgtz_f32 (float32x2_t __a)
17803 float32x2_t __b = {0.0f, 0.0f};
17804 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
17807 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17808 vcgtz_f64 (float64x1_t __a)
17810 return __a > 0.0 ? -1ll : 0ll;
17813 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17814 vcgtz_p8 (poly8x8_t __a)
17816 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17817 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
17818 (int8x8_t) __b);
17821 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17822 vcgtz_s8 (int8x8_t __a)
17824 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17825 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
17828 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17829 vcgtz_s16 (int16x4_t __a)
17831 int16x4_t __b = {0, 0, 0, 0};
17832 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
17835 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17836 vcgtz_s32 (int32x2_t __a)
17838 int32x2_t __b = {0, 0};
17839 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
17842 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17843 vcgtz_s64 (int64x1_t __a)
17845 return __a > 0ll ? -1ll : 0ll;
17848 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17849 vcgtz_u8 (uint8x8_t __a)
17851 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17852 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
17853 (int8x8_t) __b);
17856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17857 vcgtz_u16 (uint16x4_t __a)
17859 uint16x4_t __b = {0, 0, 0, 0};
17860 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
17861 (int16x4_t) __b);
17864 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17865 vcgtz_u32 (uint32x2_t __a)
17867 uint32x2_t __b = {0, 0};
17868 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
17869 (int32x2_t) __b);
17872 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17873 vcgtz_u64 (uint64x1_t __a)
17875 return __a > 0ll ? -1ll : 0ll;
17878 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17879 vcgtzq_f32 (float32x4_t __a)
17881 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17882 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
17885 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17886 vcgtzq_f64 (float64x2_t __a)
17888 float64x2_t __b = {0.0, 0.0};
17889 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
17892 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17893 vcgtzq_p8 (poly8x16_t __a)
17895 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17896 0, 0, 0, 0, 0, 0, 0, 0};
17897 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
17898 (int8x16_t) __b);
17901 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17902 vcgtzq_s8 (int8x16_t __a)
17904 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17905 0, 0, 0, 0, 0, 0, 0, 0};
17906 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
17909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17910 vcgtzq_s16 (int16x8_t __a)
17912 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17913 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
17916 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17917 vcgtzq_s32 (int32x4_t __a)
17919 int32x4_t __b = {0, 0, 0, 0};
17920 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
17923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17924 vcgtzq_s64 (int64x2_t __a)
17926 int64x2_t __b = {0, 0};
17927 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
17930 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17931 vcgtzq_u8 (uint8x16_t __a)
17933 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17934 0, 0, 0, 0, 0, 0, 0, 0};
17935 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
17936 (int8x16_t) __b);
17939 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17940 vcgtzq_u16 (uint16x8_t __a)
17942 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17943 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
17944 (int16x8_t) __b);
17947 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17948 vcgtzq_u32 (uint32x4_t __a)
17950 uint32x4_t __b = {0, 0, 0, 0};
17951 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
17952 (int32x4_t) __b);
17955 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17956 vcgtzq_u64 (uint64x2_t __a)
17958 uint64x2_t __b = {0, 0};
17959 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
17960 (int64x2_t) __b);
17963 /* vcgtz - scalar. */
17965 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17966 vcgtzs_f32 (float32_t __a)
17968 return __a > 0.0f ? -1 : 0;
17971 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17972 vcgtzd_s64 (int64x1_t __a)
17974 return __a > 0 ? -1ll : 0ll;
17977 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17978 vcgtzd_u64 (int64x1_t __a)
17980 return __a > 0 ? -1ll : 0ll;
17983 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17984 vcgtzd_f64 (float64_t __a)
17986 return __a > 0.0 ? -1ll : 0ll;
17989 /* vcle - vector. */
17991 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17992 vcle_f32 (float32x2_t __a, float32x2_t __b)
17994 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
17997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17998 vcle_f64 (float64x1_t __a, float64x1_t __b)
18000 return __a <= __b ? -1ll : 0ll;
18003 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18004 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
18006 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
18007 (int8x8_t) __a);
18010 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18011 vcle_s8 (int8x8_t __a, int8x8_t __b)
18013 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
18016 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18017 vcle_s16 (int16x4_t __a, int16x4_t __b)
18019 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
18022 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18023 vcle_s32 (int32x2_t __a, int32x2_t __b)
18025 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
18028 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18029 vcle_s64 (int64x1_t __a, int64x1_t __b)
18031 return __a <= __b ? -1ll : 0ll;
18034 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18035 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
18037 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
18038 (int8x8_t) __a);
18041 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18042 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
18044 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
18045 (int16x4_t) __a);
18048 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18049 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
18051 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
18052 (int32x2_t) __a);
18055 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18056 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
18058 return __a <= __b ? -1ll : 0ll;
18061 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18062 vcleq_f32 (float32x4_t __a, float32x4_t __b)
18064 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
18067 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18068 vcleq_f64 (float64x2_t __a, float64x2_t __b)
18070 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
18073 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18074 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
18076 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
18077 (int8x16_t) __a);
18080 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18081 vcleq_s8 (int8x16_t __a, int8x16_t __b)
18083 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
18086 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18087 vcleq_s16 (int16x8_t __a, int16x8_t __b)
18089 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
18092 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18093 vcleq_s32 (int32x4_t __a, int32x4_t __b)
18095 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
18098 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18099 vcleq_s64 (int64x2_t __a, int64x2_t __b)
18101 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
18104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18105 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
18107 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
18108 (int8x16_t) __a);
18111 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18112 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
18114 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
18115 (int16x8_t) __a);
18118 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18119 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
18121 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
18122 (int32x4_t) __a);
18125 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18126 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
18128 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
18129 (int64x2_t) __a);
18132 /* vcle - scalar. */
18134 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18135 vcles_f32 (float32_t __a, float32_t __b)
18137 return __a <= __b ? -1 : 0;
18140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18141 vcled_s64 (int64x1_t __a, int64x1_t __b)
18143 return __a <= __b ? -1ll : 0ll;
18146 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18147 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
18149 return __a <= __b ? -1ll : 0ll;
18152 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18153 vcled_f64 (float64_t __a, float64_t __b)
18155 return __a <= __b ? -1ll : 0ll;
18158 /* vclez - vector. */
18160 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18161 vclez_f32 (float32x2_t __a)
18163 float32x2_t __b = {0.0f, 0.0f};
18164 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
18167 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18168 vclez_f64 (float64x1_t __a)
18170 return __a <= 0.0 ? -1ll : 0ll;
18173 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18174 vclez_p8 (poly8x8_t __a)
18176 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18177 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
18178 (int8x8_t) __b);
18181 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18182 vclez_s8 (int8x8_t __a)
18184 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18185 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
18188 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18189 vclez_s16 (int16x4_t __a)
18191 int16x4_t __b = {0, 0, 0, 0};
18192 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
18195 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18196 vclez_s32 (int32x2_t __a)
18198 int32x2_t __b = {0, 0};
18199 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
18202 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18203 vclez_s64 (int64x1_t __a)
18205 return __a <= 0ll ? -1ll : 0ll;
18208 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18209 vclez_u64 (uint64x1_t __a)
18211 return __a <= 0ll ? -1ll : 0ll;
18214 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18215 vclezq_f32 (float32x4_t __a)
18217 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18218 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
18221 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18222 vclezq_f64 (float64x2_t __a)
18224 float64x2_t __b = {0.0, 0.0};
18225 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
18228 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18229 vclezq_p8 (poly8x16_t __a)
18231 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18232 0, 0, 0, 0, 0, 0, 0, 0};
18233 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
18234 (int8x16_t) __b);
18237 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18238 vclezq_s8 (int8x16_t __a)
18240 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18241 0, 0, 0, 0, 0, 0, 0, 0};
18242 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
18245 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18246 vclezq_s16 (int16x8_t __a)
18248 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18249 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
18252 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18253 vclezq_s32 (int32x4_t __a)
18255 int32x4_t __b = {0, 0, 0, 0};
18256 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
18259 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18260 vclezq_s64 (int64x2_t __a)
18262 int64x2_t __b = {0, 0};
18263 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
18266 /* vclez - scalar. */
18268 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18269 vclezs_f32 (float32_t __a)
18271 return __a <= 0.0f ? -1 : 0;
18274 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18275 vclezd_s64 (int64x1_t __a)
18277 return __a <= 0 ? -1ll : 0ll;
18280 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18281 vclezd_u64 (int64x1_t __a)
18283 return __a <= 0 ? -1ll : 0ll;
18286 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18287 vclezd_f64 (float64_t __a)
18289 return __a <= 0.0 ? -1ll : 0ll;
18292 /* vclt - vector. */
18294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18295 vclt_f32 (float32x2_t __a, float32x2_t __b)
18297 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
18300 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18301 vclt_f64 (float64x1_t __a, float64x1_t __b)
18303 return __a < __b ? -1ll : 0ll;
18306 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18307 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
18309 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
18310 (int8x8_t) __a);
18313 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18314 vclt_s8 (int8x8_t __a, int8x8_t __b)
18316 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
18319 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18320 vclt_s16 (int16x4_t __a, int16x4_t __b)
18322 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
18325 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18326 vclt_s32 (int32x2_t __a, int32x2_t __b)
18328 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
18331 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18332 vclt_s64 (int64x1_t __a, int64x1_t __b)
18334 return __a < __b ? -1ll : 0ll;
18337 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18338 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
18340 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
18341 (int8x8_t) __a);
18344 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18345 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
18347 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
18348 (int16x4_t) __a);
18351 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18352 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
18354 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
18355 (int32x2_t) __a);
18358 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18359 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
18361 return __a < __b ? -1ll : 0ll;
18364 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18365 vcltq_f32 (float32x4_t __a, float32x4_t __b)
18367 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
18370 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18371 vcltq_f64 (float64x2_t __a, float64x2_t __b)
18373 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
18376 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18377 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
18379 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
18380 (int8x16_t) __a);
18383 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18384 vcltq_s8 (int8x16_t __a, int8x16_t __b)
18386 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
18389 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18390 vcltq_s16 (int16x8_t __a, int16x8_t __b)
18392 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
18395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18396 vcltq_s32 (int32x4_t __a, int32x4_t __b)
18398 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
18401 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18402 vcltq_s64 (int64x2_t __a, int64x2_t __b)
18404 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
18407 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18408 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
18410 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
18411 (int8x16_t) __a);
18414 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18415 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
18417 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
18418 (int16x8_t) __a);
18421 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18422 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
18424 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
18425 (int32x4_t) __a);
18428 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18429 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
18431 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
18432 (int64x2_t) __a);
18435 /* vclt - scalar. */
18437 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18438 vclts_f32 (float32_t __a, float32_t __b)
18440 return __a < __b ? -1 : 0;
18443 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18444 vcltd_s64 (int64x1_t __a, int64x1_t __b)
18446 return __a < __b ? -1ll : 0ll;
18449 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18450 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
18452 return __a < __b ? -1ll : 0ll;
18455 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18456 vcltd_f64 (float64_t __a, float64_t __b)
18458 return __a < __b ? -1ll : 0ll;
18461 /* vcltz - vector. */
18463 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18464 vcltz_f32 (float32x2_t __a)
18466 float32x2_t __b = {0.0f, 0.0f};
18467 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
18470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18471 vcltz_f64 (float64x1_t __a)
18473 return __a < 0.0 ? -1ll : 0ll;
18476 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18477 vcltz_p8 (poly8x8_t __a)
18479 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18480 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
18481 (int8x8_t) __b);
18484 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18485 vcltz_s8 (int8x8_t __a)
18487 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18488 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
18491 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18492 vcltz_s16 (int16x4_t __a)
18494 int16x4_t __b = {0, 0, 0, 0};
18495 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
18498 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18499 vcltz_s32 (int32x2_t __a)
18501 int32x2_t __b = {0, 0};
18502 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
18505 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18506 vcltz_s64 (int64x1_t __a)
18508 return __a < 0ll ? -1ll : 0ll;
18511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18512 vcltzq_f32 (float32x4_t __a)
18514 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18515 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
18518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18519 vcltzq_f64 (float64x2_t __a)
18521 float64x2_t __b = {0.0, 0.0};
18522 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
18525 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18526 vcltzq_p8 (poly8x16_t __a)
18528 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18529 0, 0, 0, 0, 0, 0, 0, 0};
18530 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
18531 (int8x16_t) __b);
18534 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18535 vcltzq_s8 (int8x16_t __a)
18537 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18538 0, 0, 0, 0, 0, 0, 0, 0};
18539 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
18542 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18543 vcltzq_s16 (int16x8_t __a)
18545 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18546 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
18549 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18550 vcltzq_s32 (int32x4_t __a)
18552 int32x4_t __b = {0, 0, 0, 0};
18553 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
18556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18557 vcltzq_s64 (int64x2_t __a)
18559 int64x2_t __b = {0, 0};
18560 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
18563 /* vcltz - scalar. */
18565 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18566 vcltzs_f32 (float32_t __a)
18568 return __a < 0.0f ? -1 : 0;
18571 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18572 vcltzd_s64 (int64x1_t __a)
18574 return __a < 0 ? -1ll : 0ll;
18577 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18578 vcltzd_u64 (int64x1_t __a)
18580 return __a < 0 ? -1ll : 0ll;
18583 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18584 vcltzd_f64 (float64_t __a)
18586 return __a < 0.0 ? -1ll : 0ll;
18589 /* vcvt (double -> float). */
18591 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18592 vcvt_f32_f64 (float64x2_t __a)
18594 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
18597 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18598 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
18600 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
18603 /* vcvt (float -> double). */
18605 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18606 vcvt_f64_f32 (float32x2_t __a)
18609 return __builtin_aarch64_float_extend_lo_v2df (__a);
18612 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18613 vcvt_high_f64_f32 (float32x4_t __a)
18615 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
18618 /* vcvt (<u>int -> float) */
18620 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18621 vcvtd_f64_s64 (int64_t __a)
18623 return (float64_t) __a;
18626 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18627 vcvtd_f64_u64 (uint64_t __a)
18629 return (float64_t) __a;
18632 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18633 vcvts_f32_s32 (int32_t __a)
18635 return (float32_t) __a;
18638 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18639 vcvts_f32_u32 (uint32_t __a)
18641 return (float32_t) __a;
18644 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18645 vcvt_f32_s32 (int32x2_t __a)
18647 return __builtin_aarch64_floatv2siv2sf (__a);
18650 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18651 vcvt_f32_u32 (uint32x2_t __a)
18653 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
18656 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18657 vcvtq_f32_s32 (int32x4_t __a)
18659 return __builtin_aarch64_floatv4siv4sf (__a);
18662 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18663 vcvtq_f32_u32 (uint32x4_t __a)
18665 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
18668 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18669 vcvtq_f64_s64 (int64x2_t __a)
18671 return __builtin_aarch64_floatv2div2df (__a);
18674 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18675 vcvtq_f64_u64 (uint64x2_t __a)
18677 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
18680 /* vcvt (float -> <u>int) */
18682 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18683 vcvtd_s64_f64 (float64_t __a)
18685 return (int64_t) __a;
18688 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18689 vcvtd_u64_f64 (float64_t __a)
18691 return (uint64_t) __a;
18694 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18695 vcvts_s32_f32 (float32_t __a)
18697 return (int32_t) __a;
18700 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18701 vcvts_u32_f32 (float32_t __a)
18703 return (uint32_t) __a;
18706 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18707 vcvt_s32_f32 (float32x2_t __a)
18709 return __builtin_aarch64_lbtruncv2sfv2si (__a);
18712 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18713 vcvt_u32_f32 (float32x2_t __a)
18715 /* TODO: This cast should go away when builtins have
18716 their correct types. */
18717 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
18720 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18721 vcvtq_s32_f32 (float32x4_t __a)
18723 return __builtin_aarch64_lbtruncv4sfv4si (__a);
18726 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18727 vcvtq_u32_f32 (float32x4_t __a)
18729 /* TODO: This cast should go away when builtins have
18730 their correct types. */
18731 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
18734 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18735 vcvtq_s64_f64 (float64x2_t __a)
18737 return __builtin_aarch64_lbtruncv2dfv2di (__a);
18740 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18741 vcvtq_u64_f64 (float64x2_t __a)
18743 /* TODO: This cast should go away when builtins have
18744 their correct types. */
18745 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
18748 /* vcvta */
18750 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18751 vcvtad_s64_f64 (float64_t __a)
18753 return __builtin_aarch64_lrounddfdi (__a);
18756 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18757 vcvtad_u64_f64 (float64_t __a)
18759 return __builtin_aarch64_lroundudfdi (__a);
18762 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18763 vcvtas_s32_f32 (float32_t __a)
18765 return __builtin_aarch64_lroundsfsi (__a);
18768 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18769 vcvtas_u32_f32 (float32_t __a)
18771 return __builtin_aarch64_lroundusfsi (__a);
18774 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18775 vcvta_s32_f32 (float32x2_t __a)
18777 return __builtin_aarch64_lroundv2sfv2si (__a);
18780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18781 vcvta_u32_f32 (float32x2_t __a)
18783 /* TODO: This cast should go away when builtins have
18784 their correct types. */
18785 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
18788 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18789 vcvtaq_s32_f32 (float32x4_t __a)
18791 return __builtin_aarch64_lroundv4sfv4si (__a);
18794 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18795 vcvtaq_u32_f32 (float32x4_t __a)
18797 /* TODO: This cast should go away when builtins have
18798 their correct types. */
18799 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
18802 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18803 vcvtaq_s64_f64 (float64x2_t __a)
18805 return __builtin_aarch64_lroundv2dfv2di (__a);
18808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18809 vcvtaq_u64_f64 (float64x2_t __a)
18811 /* TODO: This cast should go away when builtins have
18812 their correct types. */
18813 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
18816 /* vcvtm */
18818 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18819 vcvtmd_s64_f64 (float64_t __a)
18821 return __builtin_lfloor (__a);
18824 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18825 vcvtmd_u64_f64 (float64_t __a)
18827 return __builtin_aarch64_lfloorudfdi (__a);
18830 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18831 vcvtms_s32_f32 (float32_t __a)
18833 return __builtin_ifloorf (__a);
18836 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18837 vcvtms_u32_f32 (float32_t __a)
18839 return __builtin_aarch64_lfloorusfsi (__a);
18842 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18843 vcvtm_s32_f32 (float32x2_t __a)
18845 return __builtin_aarch64_lfloorv2sfv2si (__a);
18848 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18849 vcvtm_u32_f32 (float32x2_t __a)
18851 /* TODO: This cast should go away when builtins have
18852 their correct types. */
18853 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
18856 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18857 vcvtmq_s32_f32 (float32x4_t __a)
18859 return __builtin_aarch64_lfloorv4sfv4si (__a);
18862 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18863 vcvtmq_u32_f32 (float32x4_t __a)
18865 /* TODO: This cast should go away when builtins have
18866 their correct types. */
18867 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
18870 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18871 vcvtmq_s64_f64 (float64x2_t __a)
18873 return __builtin_aarch64_lfloorv2dfv2di (__a);
18876 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18877 vcvtmq_u64_f64 (float64x2_t __a)
18879 /* TODO: This cast should go away when builtins have
18880 their correct types. */
18881 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
18884 /* vcvtn */
18886 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18887 vcvtnd_s64_f64 (float64_t __a)
18889 return __builtin_aarch64_lfrintndfdi (__a);
18892 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18893 vcvtnd_u64_f64 (float64_t __a)
18895 return __builtin_aarch64_lfrintnudfdi (__a);
18898 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18899 vcvtns_s32_f32 (float32_t __a)
18901 return __builtin_aarch64_lfrintnsfsi (__a);
18904 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18905 vcvtns_u32_f32 (float32_t __a)
18907 return __builtin_aarch64_lfrintnusfsi (__a);
18910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18911 vcvtn_s32_f32 (float32x2_t __a)
18913 return __builtin_aarch64_lfrintnv2sfv2si (__a);
18916 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18917 vcvtn_u32_f32 (float32x2_t __a)
18919 /* TODO: This cast should go away when builtins have
18920 their correct types. */
18921 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
18924 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18925 vcvtnq_s32_f32 (float32x4_t __a)
18927 return __builtin_aarch64_lfrintnv4sfv4si (__a);
18930 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18931 vcvtnq_u32_f32 (float32x4_t __a)
18933 /* TODO: This cast should go away when builtins have
18934 their correct types. */
18935 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
18938 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18939 vcvtnq_s64_f64 (float64x2_t __a)
18941 return __builtin_aarch64_lfrintnv2dfv2di (__a);
18944 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18945 vcvtnq_u64_f64 (float64x2_t __a)
18947 /* TODO: This cast should go away when builtins have
18948 their correct types. */
18949 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
18952 /* vcvtp */
18954 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18955 vcvtpd_s64_f64 (float64_t __a)
18957 return __builtin_lceil (__a);
18960 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18961 vcvtpd_u64_f64 (float64_t __a)
18963 return __builtin_aarch64_lceiludfdi (__a);
18966 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18967 vcvtps_s32_f32 (float32_t __a)
18969 return __builtin_iceilf (__a);
18972 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18973 vcvtps_u32_f32 (float32_t __a)
18975 return __builtin_aarch64_lceilusfsi (__a);
18978 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18979 vcvtp_s32_f32 (float32x2_t __a)
18981 return __builtin_aarch64_lceilv2sfv2si (__a);
18984 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18985 vcvtp_u32_f32 (float32x2_t __a)
18987 /* TODO: This cast should go away when builtins have
18988 their correct types. */
18989 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
18992 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18993 vcvtpq_s32_f32 (float32x4_t __a)
18995 return __builtin_aarch64_lceilv4sfv4si (__a);
18998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18999 vcvtpq_u32_f32 (float32x4_t __a)
19001 /* TODO: This cast should go away when builtins have
19002 their correct types. */
19003 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
19006 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19007 vcvtpq_s64_f64 (float64x2_t __a)
19009 return __builtin_aarch64_lceilv2dfv2di (__a);
19012 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19013 vcvtpq_u64_f64 (float64x2_t __a)
19015 /* TODO: This cast should go away when builtins have
19016 their correct types. */
19017 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
19020 /* vdup_n */
19022 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19023 vdup_n_f32 (float32_t __a)
19025 return (float32x2_t) {__a, __a};
19028 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19029 vdup_n_f64 (float64_t __a)
19031 return __a;
19034 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19035 vdup_n_p8 (poly8_t __a)
19037 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19040 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19041 vdup_n_p16 (poly16_t __a)
19043 return (poly16x4_t) {__a, __a, __a, __a};
19046 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19047 vdup_n_s8 (int8_t __a)
19049 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19052 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19053 vdup_n_s16 (int16_t __a)
19055 return (int16x4_t) {__a, __a, __a, __a};
19058 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19059 vdup_n_s32 (int32_t __a)
19061 return (int32x2_t) {__a, __a};
19064 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19065 vdup_n_s64 (int64_t __a)
19067 return __a;
19070 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19071 vdup_n_u8 (uint8_t __a)
19073 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19076 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19077 vdup_n_u16 (uint16_t __a)
19079 return (uint16x4_t) {__a, __a, __a, __a};
19082 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19083 vdup_n_u32 (uint32_t __a)
19085 return (uint32x2_t) {__a, __a};
19088 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19089 vdup_n_u64 (uint64_t __a)
19091 return __a;
19094 /* vdupq_n */
19096 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19097 vdupq_n_f32 (float32_t __a)
19099 return (float32x4_t) {__a, __a, __a, __a};
19102 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19103 vdupq_n_f64 (float64_t __a)
19105 return (float64x2_t) {__a, __a};
19108 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19109 vdupq_n_p8 (uint32_t __a)
19111 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
19112 __a, __a, __a, __a, __a, __a, __a, __a};
19115 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19116 vdupq_n_p16 (uint32_t __a)
19118 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19122 vdupq_n_s8 (int32_t __a)
19124 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
19125 __a, __a, __a, __a, __a, __a, __a, __a};
19128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19129 vdupq_n_s16 (int32_t __a)
19131 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19135 vdupq_n_s32 (int32_t __a)
19137 return (int32x4_t) {__a, __a, __a, __a};
19140 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19141 vdupq_n_s64 (int64_t __a)
19143 return (int64x2_t) {__a, __a};
19146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19147 vdupq_n_u8 (uint32_t __a)
19149 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
19150 __a, __a, __a, __a, __a, __a, __a, __a};
19153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19154 vdupq_n_u16 (uint32_t __a)
19156 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
19159 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19160 vdupq_n_u32 (uint32_t __a)
19162 return (uint32x4_t) {__a, __a, __a, __a};
19165 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19166 vdupq_n_u64 (uint64_t __a)
19168 return (uint64x2_t) {__a, __a};
19171 /* vdup_lane */
19173 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19174 vdup_lane_f32 (float32x2_t __a, const int __b)
19176 return __aarch64_vdup_lane_f32 (__a, __b);
19179 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19180 vdup_lane_f64 (float64x1_t __a, const int __b)
19182 return __aarch64_vdup_lane_f64 (__a, __b);
19185 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19186 vdup_lane_p8 (poly8x8_t __a, const int __b)
19188 return __aarch64_vdup_lane_p8 (__a, __b);
19191 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19192 vdup_lane_p16 (poly16x4_t __a, const int __b)
19194 return __aarch64_vdup_lane_p16 (__a, __b);
19197 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19198 vdup_lane_s8 (int8x8_t __a, const int __b)
19200 return __aarch64_vdup_lane_s8 (__a, __b);
19203 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19204 vdup_lane_s16 (int16x4_t __a, const int __b)
19206 return __aarch64_vdup_lane_s16 (__a, __b);
19209 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19210 vdup_lane_s32 (int32x2_t __a, const int __b)
19212 return __aarch64_vdup_lane_s32 (__a, __b);
19215 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19216 vdup_lane_s64 (int64x1_t __a, const int __b)
19218 return __aarch64_vdup_lane_s64 (__a, __b);
19221 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19222 vdup_lane_u8 (uint8x8_t __a, const int __b)
19224 return __aarch64_vdup_lane_u8 (__a, __b);
19227 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19228 vdup_lane_u16 (uint16x4_t __a, const int __b)
19230 return __aarch64_vdup_lane_u16 (__a, __b);
19233 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19234 vdup_lane_u32 (uint32x2_t __a, const int __b)
19236 return __aarch64_vdup_lane_u32 (__a, __b);
19239 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19240 vdup_lane_u64 (uint64x1_t __a, const int __b)
19242 return __aarch64_vdup_lane_u64 (__a, __b);
19245 /* vdup_laneq */
19247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19248 vdup_laneq_f32 (float32x4_t __a, const int __b)
19250 return __aarch64_vdup_laneq_f32 (__a, __b);
19253 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19254 vdup_laneq_f64 (float64x2_t __a, const int __b)
19256 return __aarch64_vdup_laneq_f64 (__a, __b);
19259 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19260 vdup_laneq_p8 (poly8x16_t __a, const int __b)
19262 return __aarch64_vdup_laneq_p8 (__a, __b);
19265 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19266 vdup_laneq_p16 (poly16x8_t __a, const int __b)
19268 return __aarch64_vdup_laneq_p16 (__a, __b);
19271 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19272 vdup_laneq_s8 (int8x16_t __a, const int __b)
19274 return __aarch64_vdup_laneq_s8 (__a, __b);
19277 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19278 vdup_laneq_s16 (int16x8_t __a, const int __b)
19280 return __aarch64_vdup_laneq_s16 (__a, __b);
19283 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19284 vdup_laneq_s32 (int32x4_t __a, const int __b)
19286 return __aarch64_vdup_laneq_s32 (__a, __b);
19289 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19290 vdup_laneq_s64 (int64x2_t __a, const int __b)
19292 return __aarch64_vdup_laneq_s64 (__a, __b);
19295 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19296 vdup_laneq_u8 (uint8x16_t __a, const int __b)
19298 return __aarch64_vdup_laneq_u8 (__a, __b);
19301 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19302 vdup_laneq_u16 (uint16x8_t __a, const int __b)
19304 return __aarch64_vdup_laneq_u16 (__a, __b);
19307 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19308 vdup_laneq_u32 (uint32x4_t __a, const int __b)
19310 return __aarch64_vdup_laneq_u32 (__a, __b);
19313 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19314 vdup_laneq_u64 (uint64x2_t __a, const int __b)
19316 return __aarch64_vdup_laneq_u64 (__a, __b);
19319 /* vdupq_lane */
19320 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19321 vdupq_lane_f32 (float32x2_t __a, const int __b)
19323 return __aarch64_vdupq_lane_f32 (__a, __b);
19326 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19327 vdupq_lane_f64 (float64x1_t __a, const int __b)
19329 return __aarch64_vdupq_lane_f64 (__a, __b);
19332 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19333 vdupq_lane_p8 (poly8x8_t __a, const int __b)
19335 return __aarch64_vdupq_lane_p8 (__a, __b);
19338 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19339 vdupq_lane_p16 (poly16x4_t __a, const int __b)
19341 return __aarch64_vdupq_lane_p16 (__a, __b);
19344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19345 vdupq_lane_s8 (int8x8_t __a, const int __b)
19347 return __aarch64_vdupq_lane_s8 (__a, __b);
19350 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19351 vdupq_lane_s16 (int16x4_t __a, const int __b)
19353 return __aarch64_vdupq_lane_s16 (__a, __b);
19356 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19357 vdupq_lane_s32 (int32x2_t __a, const int __b)
19359 return __aarch64_vdupq_lane_s32 (__a, __b);
19362 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19363 vdupq_lane_s64 (int64x1_t __a, const int __b)
19365 return __aarch64_vdupq_lane_s64 (__a, __b);
19368 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19369 vdupq_lane_u8 (uint8x8_t __a, const int __b)
19371 return __aarch64_vdupq_lane_u8 (__a, __b);
19374 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19375 vdupq_lane_u16 (uint16x4_t __a, const int __b)
19377 return __aarch64_vdupq_lane_u16 (__a, __b);
19380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19381 vdupq_lane_u32 (uint32x2_t __a, const int __b)
19383 return __aarch64_vdupq_lane_u32 (__a, __b);
19386 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19387 vdupq_lane_u64 (uint64x1_t __a, const int __b)
19389 return __aarch64_vdupq_lane_u64 (__a, __b);
19392 /* vdupq_laneq */
19393 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19394 vdupq_laneq_f32 (float32x4_t __a, const int __b)
19396 return __aarch64_vdupq_laneq_f32 (__a, __b);
19399 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19400 vdupq_laneq_f64 (float64x2_t __a, const int __b)
19402 return __aarch64_vdupq_laneq_f64 (__a, __b);
19405 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19406 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
19408 return __aarch64_vdupq_laneq_p8 (__a, __b);
19411 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19412 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
19414 return __aarch64_vdupq_laneq_p16 (__a, __b);
19417 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19418 vdupq_laneq_s8 (int8x16_t __a, const int __b)
19420 return __aarch64_vdupq_laneq_s8 (__a, __b);
19423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19424 vdupq_laneq_s16 (int16x8_t __a, const int __b)
19426 return __aarch64_vdupq_laneq_s16 (__a, __b);
19429 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19430 vdupq_laneq_s32 (int32x4_t __a, const int __b)
19432 return __aarch64_vdupq_laneq_s32 (__a, __b);
19435 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19436 vdupq_laneq_s64 (int64x2_t __a, const int __b)
19438 return __aarch64_vdupq_laneq_s64 (__a, __b);
19441 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19442 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
19444 return __aarch64_vdupq_laneq_u8 (__a, __b);
19447 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19448 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
19450 return __aarch64_vdupq_laneq_u16 (__a, __b);
19453 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19454 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
19456 return __aarch64_vdupq_laneq_u32 (__a, __b);
19459 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19460 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
19462 return __aarch64_vdupq_laneq_u64 (__a, __b);
19465 /* vdupb_lane */
19466 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
19467 vdupb_lane_p8 (poly8x8_t __a, const int __b)
19469 return __aarch64_vget_lane_p8 (__a, __b);
19472 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19473 vdupb_lane_s8 (int8x8_t __a, const int __b)
19475 return __aarch64_vget_lane_s8 (__a, __b);
19478 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19479 vdupb_lane_u8 (uint8x8_t __a, const int __b)
19481 return __aarch64_vget_lane_u8 (__a, __b);
19484 /* vduph_lane */
19485 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
19486 vduph_lane_p16 (poly16x4_t __a, const int __b)
19488 return __aarch64_vget_lane_p16 (__a, __b);
19491 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19492 vduph_lane_s16 (int16x4_t __a, const int __b)
19494 return __aarch64_vget_lane_s16 (__a, __b);
19497 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19498 vduph_lane_u16 (uint16x4_t __a, const int __b)
19500 return __aarch64_vget_lane_u16 (__a, __b);
19503 /* vdups_lane */
19504 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19505 vdups_lane_f32 (float32x2_t __a, const int __b)
19507 return __aarch64_vget_lane_f32 (__a, __b);
19510 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19511 vdups_lane_s32 (int32x2_t __a, const int __b)
19513 return __aarch64_vget_lane_s32 (__a, __b);
19516 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19517 vdups_lane_u32 (uint32x2_t __a, const int __b)
19519 return __aarch64_vget_lane_u32 (__a, __b);
19522 /* vdupd_lane */
19523 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19524 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
19526 return __a;
19529 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19530 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
19532 return __a;
19535 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19536 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
19538 return __a;
19541 /* vdupb_laneq */
19542 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
19543 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
19545 return __aarch64_vgetq_lane_p8 (__a, __b);
19548 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19549 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
19551 return __aarch64_vgetq_lane_s8 (__a, __b);
19554 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19555 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
19557 return __aarch64_vgetq_lane_u8 (__a, __b);
19560 /* vduph_laneq */
19561 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
19562 vduph_laneq_p16 (poly16x8_t __a, const int __b)
19564 return __aarch64_vgetq_lane_p16 (__a, __b);
19567 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19568 vduph_laneq_s16 (int16x8_t __a, const int __b)
19570 return __aarch64_vgetq_lane_s16 (__a, __b);
19573 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19574 vduph_laneq_u16 (uint16x8_t __a, const int __b)
19576 return __aarch64_vgetq_lane_u16 (__a, __b);
19579 /* vdups_laneq */
19580 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19581 vdups_laneq_f32 (float32x4_t __a, const int __b)
19583 return __aarch64_vgetq_lane_f32 (__a, __b);
19586 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19587 vdups_laneq_s32 (int32x4_t __a, const int __b)
19589 return __aarch64_vgetq_lane_s32 (__a, __b);
19592 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19593 vdups_laneq_u32 (uint32x4_t __a, const int __b)
19595 return __aarch64_vgetq_lane_u32 (__a, __b);
19598 /* vdupd_laneq */
19599 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19600 vdupd_laneq_f64 (float64x2_t __a, const int __b)
19602 return __aarch64_vgetq_lane_f64 (__a, __b);
19605 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19606 vdupd_laneq_s64 (int64x2_t __a, const int __b)
19608 return __aarch64_vgetq_lane_s64 (__a, __b);
19611 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19612 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
19614 return __aarch64_vgetq_lane_u64 (__a, __b);
19617 /* vld1 */
19619 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19620 vld1_f32 (const float32_t *a)
19622 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
19625 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19626 vld1_f64 (const float64_t *a)
19628 return *a;
19631 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19632 vld1_p8 (const poly8_t *a)
19634 return (poly8x8_t)
19635 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19638 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19639 vld1_p16 (const poly16_t *a)
19641 return (poly16x4_t)
19642 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19645 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19646 vld1_s8 (const int8_t *a)
19648 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19651 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19652 vld1_s16 (const int16_t *a)
19654 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19657 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19658 vld1_s32 (const int32_t *a)
19660 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19663 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19664 vld1_s64 (const int64_t *a)
19666 return *a;
19669 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19670 vld1_u8 (const uint8_t *a)
19672 return (uint8x8_t)
19673 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19676 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19677 vld1_u16 (const uint16_t *a)
19679 return (uint16x4_t)
19680 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19683 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19684 vld1_u32 (const uint32_t *a)
19686 return (uint32x2_t)
19687 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19690 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19691 vld1_u64 (const uint64_t *a)
19693 return *a;
19696 /* vld1q */
19698 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19699 vld1q_f32 (const float32_t *a)
19701 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
19704 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19705 vld1q_f64 (const float64_t *a)
19707 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
19710 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19711 vld1q_p8 (const poly8_t *a)
19713 return (poly8x16_t)
19714 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19717 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19718 vld1q_p16 (const poly16_t *a)
19720 return (poly16x8_t)
19721 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19724 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19725 vld1q_s8 (const int8_t *a)
19727 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19730 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19731 vld1q_s16 (const int16_t *a)
19733 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19736 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19737 vld1q_s32 (const int32_t *a)
19739 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19742 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19743 vld1q_s64 (const int64_t *a)
19745 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19748 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19749 vld1q_u8 (const uint8_t *a)
19751 return (uint8x16_t)
19752 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19755 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19756 vld1q_u16 (const uint16_t *a)
19758 return (uint16x8_t)
19759 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19763 vld1q_u32 (const uint32_t *a)
19765 return (uint32x4_t)
19766 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19769 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19770 vld1q_u64 (const uint64_t *a)
19772 return (uint64x2_t)
19773 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19776 /* vldn */
19778 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
19779 vld2_s64 (const int64_t * __a)
19781 int64x1x2_t ret;
19782 __builtin_aarch64_simd_oi __o;
19783 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19784 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19785 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19786 return ret;
19789 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
19790 vld2_u64 (const uint64_t * __a)
19792 uint64x1x2_t ret;
19793 __builtin_aarch64_simd_oi __o;
19794 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19795 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19796 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19797 return ret;
19800 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
19801 vld2_f64 (const float64_t * __a)
19803 float64x1x2_t ret;
19804 __builtin_aarch64_simd_oi __o;
19805 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
19806 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
19807 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
19808 return ret;
19811 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
19812 vld2_s8 (const int8_t * __a)
19814 int8x8x2_t ret;
19815 __builtin_aarch64_simd_oi __o;
19816 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19817 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19818 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19819 return ret;
19822 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
19823 vld2_p8 (const poly8_t * __a)
19825 poly8x8x2_t ret;
19826 __builtin_aarch64_simd_oi __o;
19827 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19828 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19829 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19830 return ret;
19833 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
19834 vld2_s16 (const int16_t * __a)
19836 int16x4x2_t ret;
19837 __builtin_aarch64_simd_oi __o;
19838 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19839 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19840 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19841 return ret;
19844 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
19845 vld2_p16 (const poly16_t * __a)
19847 poly16x4x2_t ret;
19848 __builtin_aarch64_simd_oi __o;
19849 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19850 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19851 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19852 return ret;
19855 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
19856 vld2_s32 (const int32_t * __a)
19858 int32x2x2_t ret;
19859 __builtin_aarch64_simd_oi __o;
19860 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
19861 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
19862 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
19863 return ret;
19866 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
19867 vld2_u8 (const uint8_t * __a)
19869 uint8x8x2_t ret;
19870 __builtin_aarch64_simd_oi __o;
19871 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19872 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19873 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19874 return ret;
19877 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
19878 vld2_u16 (const uint16_t * __a)
19880 uint16x4x2_t ret;
19881 __builtin_aarch64_simd_oi __o;
19882 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19883 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19884 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19885 return ret;
19888 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
19889 vld2_u32 (const uint32_t * __a)
19891 uint32x2x2_t ret;
19892 __builtin_aarch64_simd_oi __o;
19893 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
19894 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
19895 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
19896 return ret;
19899 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
19900 vld2_f32 (const float32_t * __a)
19902 float32x2x2_t ret;
19903 __builtin_aarch64_simd_oi __o;
19904 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
19905 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
19906 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
19907 return ret;
19910 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
19911 vld2q_s8 (const int8_t * __a)
19913 int8x16x2_t ret;
19914 __builtin_aarch64_simd_oi __o;
19915 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19916 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19917 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19918 return ret;
19921 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
19922 vld2q_p8 (const poly8_t * __a)
19924 poly8x16x2_t ret;
19925 __builtin_aarch64_simd_oi __o;
19926 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19927 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19928 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19929 return ret;
19932 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
19933 vld2q_s16 (const int16_t * __a)
19935 int16x8x2_t ret;
19936 __builtin_aarch64_simd_oi __o;
19937 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19938 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19939 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19940 return ret;
19943 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
19944 vld2q_p16 (const poly16_t * __a)
19946 poly16x8x2_t ret;
19947 __builtin_aarch64_simd_oi __o;
19948 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19949 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19950 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19951 return ret;
19954 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
19955 vld2q_s32 (const int32_t * __a)
19957 int32x4x2_t ret;
19958 __builtin_aarch64_simd_oi __o;
19959 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19960 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19961 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19962 return ret;
19965 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
19966 vld2q_s64 (const int64_t * __a)
19968 int64x2x2_t ret;
19969 __builtin_aarch64_simd_oi __o;
19970 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19971 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19972 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19973 return ret;
19976 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
19977 vld2q_u8 (const uint8_t * __a)
19979 uint8x16x2_t ret;
19980 __builtin_aarch64_simd_oi __o;
19981 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19982 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19983 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19984 return ret;
19987 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
19988 vld2q_u16 (const uint16_t * __a)
19990 uint16x8x2_t ret;
19991 __builtin_aarch64_simd_oi __o;
19992 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19993 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19994 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19995 return ret;
19998 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
19999 vld2q_u32 (const uint32_t * __a)
20001 uint32x4x2_t ret;
20002 __builtin_aarch64_simd_oi __o;
20003 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20004 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20005 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20006 return ret;
20009 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
20010 vld2q_u64 (const uint64_t * __a)
20012 uint64x2x2_t ret;
20013 __builtin_aarch64_simd_oi __o;
20014 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20015 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20016 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20017 return ret;
20020 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
20021 vld2q_f32 (const float32_t * __a)
20023 float32x4x2_t ret;
20024 __builtin_aarch64_simd_oi __o;
20025 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
20026 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
20027 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
20028 return ret;
20031 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
20032 vld2q_f64 (const float64_t * __a)
20034 float64x2x2_t ret;
20035 __builtin_aarch64_simd_oi __o;
20036 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
20037 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
20038 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
20039 return ret;
20042 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
20043 vld3_s64 (const int64_t * __a)
20045 int64x1x3_t ret;
20046 __builtin_aarch64_simd_ci __o;
20047 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20048 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20049 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20050 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20051 return ret;
20054 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
20055 vld3_u64 (const uint64_t * __a)
20057 uint64x1x3_t ret;
20058 __builtin_aarch64_simd_ci __o;
20059 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20060 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20061 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20062 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20063 return ret;
20066 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
20067 vld3_f64 (const float64_t * __a)
20069 float64x1x3_t ret;
20070 __builtin_aarch64_simd_ci __o;
20071 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
20072 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
20073 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
20074 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
20075 return ret;
20078 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
20079 vld3_s8 (const int8_t * __a)
20081 int8x8x3_t ret;
20082 __builtin_aarch64_simd_ci __o;
20083 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20084 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20085 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20086 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20087 return ret;
20090 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
20091 vld3_p8 (const poly8_t * __a)
20093 poly8x8x3_t ret;
20094 __builtin_aarch64_simd_ci __o;
20095 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20096 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20097 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20098 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20099 return ret;
20102 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
20103 vld3_s16 (const int16_t * __a)
20105 int16x4x3_t ret;
20106 __builtin_aarch64_simd_ci __o;
20107 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20108 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20109 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20110 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20111 return ret;
20114 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
20115 vld3_p16 (const poly16_t * __a)
20117 poly16x4x3_t ret;
20118 __builtin_aarch64_simd_ci __o;
20119 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20120 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20121 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20122 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20123 return ret;
20126 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
20127 vld3_s32 (const int32_t * __a)
20129 int32x2x3_t ret;
20130 __builtin_aarch64_simd_ci __o;
20131 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20132 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20133 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20134 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20135 return ret;
20138 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
20139 vld3_u8 (const uint8_t * __a)
20141 uint8x8x3_t ret;
20142 __builtin_aarch64_simd_ci __o;
20143 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20144 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20145 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20146 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20147 return ret;
20150 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
20151 vld3_u16 (const uint16_t * __a)
20153 uint16x4x3_t ret;
20154 __builtin_aarch64_simd_ci __o;
20155 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20156 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20157 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20158 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20159 return ret;
20162 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
20163 vld3_u32 (const uint32_t * __a)
20165 uint32x2x3_t ret;
20166 __builtin_aarch64_simd_ci __o;
20167 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20168 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20169 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20170 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20171 return ret;
20174 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
20175 vld3_f32 (const float32_t * __a)
20177 float32x2x3_t ret;
20178 __builtin_aarch64_simd_ci __o;
20179 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
20180 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
20181 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
20182 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
20183 return ret;
20186 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
20187 vld3q_s8 (const int8_t * __a)
20189 int8x16x3_t ret;
20190 __builtin_aarch64_simd_ci __o;
20191 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20192 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20193 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20194 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20195 return ret;
20198 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
20199 vld3q_p8 (const poly8_t * __a)
20201 poly8x16x3_t ret;
20202 __builtin_aarch64_simd_ci __o;
20203 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20204 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20205 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20206 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20207 return ret;
20210 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
20211 vld3q_s16 (const int16_t * __a)
20213 int16x8x3_t ret;
20214 __builtin_aarch64_simd_ci __o;
20215 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20216 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20217 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20218 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20219 return ret;
20222 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
20223 vld3q_p16 (const poly16_t * __a)
20225 poly16x8x3_t ret;
20226 __builtin_aarch64_simd_ci __o;
20227 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20228 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20229 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20230 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20231 return ret;
20234 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
20235 vld3q_s32 (const int32_t * __a)
20237 int32x4x3_t ret;
20238 __builtin_aarch64_simd_ci __o;
20239 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20240 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20241 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20242 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20243 return ret;
20246 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
20247 vld3q_s64 (const int64_t * __a)
20249 int64x2x3_t ret;
20250 __builtin_aarch64_simd_ci __o;
20251 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20252 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20253 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20254 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20255 return ret;
20258 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
20259 vld3q_u8 (const uint8_t * __a)
20261 uint8x16x3_t ret;
20262 __builtin_aarch64_simd_ci __o;
20263 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20264 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20265 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20266 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20267 return ret;
20270 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
20271 vld3q_u16 (const uint16_t * __a)
20273 uint16x8x3_t ret;
20274 __builtin_aarch64_simd_ci __o;
20275 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20276 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20277 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20278 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20279 return ret;
20282 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
20283 vld3q_u32 (const uint32_t * __a)
20285 uint32x4x3_t ret;
20286 __builtin_aarch64_simd_ci __o;
20287 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20288 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20289 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20290 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20291 return ret;
20294 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
20295 vld3q_u64 (const uint64_t * __a)
20297 uint64x2x3_t ret;
20298 __builtin_aarch64_simd_ci __o;
20299 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20300 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20301 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20302 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20303 return ret;
20306 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
20307 vld3q_f32 (const float32_t * __a)
20309 float32x4x3_t ret;
20310 __builtin_aarch64_simd_ci __o;
20311 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
20312 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
20313 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
20314 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
20315 return ret;
20318 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
20319 vld3q_f64 (const float64_t * __a)
20321 float64x2x3_t ret;
20322 __builtin_aarch64_simd_ci __o;
20323 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
20324 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
20325 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
20326 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
20327 return ret;
20330 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
20331 vld4_s64 (const int64_t * __a)
20333 int64x1x4_t ret;
20334 __builtin_aarch64_simd_xi __o;
20335 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20336 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20337 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20338 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20339 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20340 return ret;
20343 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
20344 vld4_u64 (const uint64_t * __a)
20346 uint64x1x4_t ret;
20347 __builtin_aarch64_simd_xi __o;
20348 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20349 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20350 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20351 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20352 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20353 return ret;
20356 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
20357 vld4_f64 (const float64_t * __a)
20359 float64x1x4_t ret;
20360 __builtin_aarch64_simd_xi __o;
20361 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
20362 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
20363 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
20364 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
20365 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
20366 return ret;
20369 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
20370 vld4_s8 (const int8_t * __a)
20372 int8x8x4_t ret;
20373 __builtin_aarch64_simd_xi __o;
20374 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20375 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20376 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20377 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20378 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20379 return ret;
20382 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
20383 vld4_p8 (const poly8_t * __a)
20385 poly8x8x4_t ret;
20386 __builtin_aarch64_simd_xi __o;
20387 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20388 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20389 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20390 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20391 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20392 return ret;
20395 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
20396 vld4_s16 (const int16_t * __a)
20398 int16x4x4_t ret;
20399 __builtin_aarch64_simd_xi __o;
20400 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20401 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20402 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20403 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20404 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20405 return ret;
20408 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
20409 vld4_p16 (const poly16_t * __a)
20411 poly16x4x4_t ret;
20412 __builtin_aarch64_simd_xi __o;
20413 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20414 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20415 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20416 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20417 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20418 return ret;
20421 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
20422 vld4_s32 (const int32_t * __a)
20424 int32x2x4_t ret;
20425 __builtin_aarch64_simd_xi __o;
20426 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20427 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20428 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20429 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20430 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20431 return ret;
20434 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
20435 vld4_u8 (const uint8_t * __a)
20437 uint8x8x4_t ret;
20438 __builtin_aarch64_simd_xi __o;
20439 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20440 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20441 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20442 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20443 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20444 return ret;
20447 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
20448 vld4_u16 (const uint16_t * __a)
20450 uint16x4x4_t ret;
20451 __builtin_aarch64_simd_xi __o;
20452 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20453 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20454 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20455 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20456 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20457 return ret;
20460 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
20461 vld4_u32 (const uint32_t * __a)
20463 uint32x2x4_t ret;
20464 __builtin_aarch64_simd_xi __o;
20465 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20466 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20467 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20468 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20469 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20470 return ret;
20473 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
20474 vld4_f32 (const float32_t * __a)
20476 float32x2x4_t ret;
20477 __builtin_aarch64_simd_xi __o;
20478 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
20479 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
20480 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
20481 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
20482 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
20483 return ret;
20486 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
20487 vld4q_s8 (const int8_t * __a)
20489 int8x16x4_t ret;
20490 __builtin_aarch64_simd_xi __o;
20491 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20492 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20493 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20494 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20495 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20496 return ret;
20499 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
20500 vld4q_p8 (const poly8_t * __a)
20502 poly8x16x4_t ret;
20503 __builtin_aarch64_simd_xi __o;
20504 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20505 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20506 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20507 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20508 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20509 return ret;
20512 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
20513 vld4q_s16 (const int16_t * __a)
20515 int16x8x4_t ret;
20516 __builtin_aarch64_simd_xi __o;
20517 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20518 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20519 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20520 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20521 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20522 return ret;
20525 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
20526 vld4q_p16 (const poly16_t * __a)
20528 poly16x8x4_t ret;
20529 __builtin_aarch64_simd_xi __o;
20530 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20531 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20532 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20533 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20534 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20535 return ret;
20538 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
20539 vld4q_s32 (const int32_t * __a)
20541 int32x4x4_t ret;
20542 __builtin_aarch64_simd_xi __o;
20543 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20544 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20545 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20546 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20547 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20548 return ret;
20551 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
20552 vld4q_s64 (const int64_t * __a)
20554 int64x2x4_t ret;
20555 __builtin_aarch64_simd_xi __o;
20556 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20557 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20558 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20559 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20560 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20561 return ret;
20564 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
20565 vld4q_u8 (const uint8_t * __a)
20567 uint8x16x4_t ret;
20568 __builtin_aarch64_simd_xi __o;
20569 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20570 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20571 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20572 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20573 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20574 return ret;
20577 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
20578 vld4q_u16 (const uint16_t * __a)
20580 uint16x8x4_t ret;
20581 __builtin_aarch64_simd_xi __o;
20582 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20583 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20584 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20585 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20586 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20587 return ret;
20590 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
20591 vld4q_u32 (const uint32_t * __a)
20593 uint32x4x4_t ret;
20594 __builtin_aarch64_simd_xi __o;
20595 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20596 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20597 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20598 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20599 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20600 return ret;
20603 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
20604 vld4q_u64 (const uint64_t * __a)
20606 uint64x2x4_t ret;
20607 __builtin_aarch64_simd_xi __o;
20608 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20609 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20610 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20611 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20612 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20613 return ret;
20616 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
20617 vld4q_f32 (const float32_t * __a)
20619 float32x4x4_t ret;
20620 __builtin_aarch64_simd_xi __o;
20621 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
20622 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
20623 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
20624 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
20625 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
20626 return ret;
20629 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
20630 vld4q_f64 (const float64_t * __a)
20632 float64x2x4_t ret;
20633 __builtin_aarch64_simd_xi __o;
20634 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
20635 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
20636 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
20637 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
20638 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
20639 return ret;
20642 /* vmax */
20644 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20645 vmax_f32 (float32x2_t __a, float32x2_t __b)
20647 return __builtin_aarch64_smax_nanv2sf (__a, __b);
20650 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20651 vmax_s8 (int8x8_t __a, int8x8_t __b)
20653 return __builtin_aarch64_smaxv8qi (__a, __b);
20656 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20657 vmax_s16 (int16x4_t __a, int16x4_t __b)
20659 return __builtin_aarch64_smaxv4hi (__a, __b);
20662 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20663 vmax_s32 (int32x2_t __a, int32x2_t __b)
20665 return __builtin_aarch64_smaxv2si (__a, __b);
20668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20669 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
20671 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
20672 (int8x8_t) __b);
20675 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20676 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
20678 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
20679 (int16x4_t) __b);
20682 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20683 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
20685 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
20686 (int32x2_t) __b);
20689 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20690 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
20692 return __builtin_aarch64_smax_nanv4sf (__a, __b);
20695 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20696 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
20698 return __builtin_aarch64_smax_nanv2df (__a, __b);
20701 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20702 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
20704 return __builtin_aarch64_smaxv16qi (__a, __b);
20707 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20708 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
20710 return __builtin_aarch64_smaxv8hi (__a, __b);
20713 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20714 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
20716 return __builtin_aarch64_smaxv4si (__a, __b);
20719 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20720 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
20722 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
20723 (int8x16_t) __b);
20726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20727 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
20729 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
20730 (int16x8_t) __b);
20733 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20734 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
20736 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
20737 (int32x4_t) __b);
20740 /* vmaxnm */
20742 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20743 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
20745 return __builtin_aarch64_smaxv2sf (__a, __b);
20748 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20749 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
20751 return __builtin_aarch64_smaxv4sf (__a, __b);
20754 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20755 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
20757 return __builtin_aarch64_smaxv2df (__a, __b);
20760 /* vmaxv */
20762 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20763 vmaxv_f32 (float32x2_t __a)
20765 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
20768 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20769 vmaxv_s8 (int8x8_t __a)
20771 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
20774 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20775 vmaxv_s16 (int16x4_t __a)
20777 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
20780 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20781 vmaxv_s32 (int32x2_t __a)
20783 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
20786 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20787 vmaxv_u8 (uint8x8_t __a)
20789 return vget_lane_u8 ((uint8x8_t)
20790 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
20793 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20794 vmaxv_u16 (uint16x4_t __a)
20796 return vget_lane_u16 ((uint16x4_t)
20797 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
20800 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20801 vmaxv_u32 (uint32x2_t __a)
20803 return vget_lane_u32 ((uint32x2_t)
20804 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
20807 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20808 vmaxvq_f32 (float32x4_t __a)
20810 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
20813 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20814 vmaxvq_f64 (float64x2_t __a)
20816 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
20819 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20820 vmaxvq_s8 (int8x16_t __a)
20822 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
20825 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20826 vmaxvq_s16 (int16x8_t __a)
20828 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
20831 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20832 vmaxvq_s32 (int32x4_t __a)
20834 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
20837 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20838 vmaxvq_u8 (uint8x16_t __a)
20840 return vgetq_lane_u8 ((uint8x16_t)
20841 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
20844 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20845 vmaxvq_u16 (uint16x8_t __a)
20847 return vgetq_lane_u16 ((uint16x8_t)
20848 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
20851 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20852 vmaxvq_u32 (uint32x4_t __a)
20854 return vgetq_lane_u32 ((uint32x4_t)
20855 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
20858 /* vmaxnmv */
20860 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20861 vmaxnmv_f32 (float32x2_t __a)
20863 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
20866 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20867 vmaxnmvq_f32 (float32x4_t __a)
20869 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
20872 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20873 vmaxnmvq_f64 (float64x2_t __a)
20875 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
20878 /* vmin */
20880 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20881 vmin_f32 (float32x2_t __a, float32x2_t __b)
20883 return __builtin_aarch64_smin_nanv2sf (__a, __b);
20886 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20887 vmin_s8 (int8x8_t __a, int8x8_t __b)
20889 return __builtin_aarch64_sminv8qi (__a, __b);
20892 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20893 vmin_s16 (int16x4_t __a, int16x4_t __b)
20895 return __builtin_aarch64_sminv4hi (__a, __b);
20898 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20899 vmin_s32 (int32x2_t __a, int32x2_t __b)
20901 return __builtin_aarch64_sminv2si (__a, __b);
20904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20905 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
20907 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
20908 (int8x8_t) __b);
20911 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20912 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
20914 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
20915 (int16x4_t) __b);
20918 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20919 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
20921 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
20922 (int32x2_t) __b);
20925 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20926 vminq_f32 (float32x4_t __a, float32x4_t __b)
20928 return __builtin_aarch64_smin_nanv4sf (__a, __b);
20931 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20932 vminq_f64 (float64x2_t __a, float64x2_t __b)
20934 return __builtin_aarch64_smin_nanv2df (__a, __b);
20937 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20938 vminq_s8 (int8x16_t __a, int8x16_t __b)
20940 return __builtin_aarch64_sminv16qi (__a, __b);
20943 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20944 vminq_s16 (int16x8_t __a, int16x8_t __b)
20946 return __builtin_aarch64_sminv8hi (__a, __b);
20949 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20950 vminq_s32 (int32x4_t __a, int32x4_t __b)
20952 return __builtin_aarch64_sminv4si (__a, __b);
20955 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20956 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
20958 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
20959 (int8x16_t) __b);
20962 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20963 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
20965 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
20966 (int16x8_t) __b);
20969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20970 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
20972 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
20973 (int32x4_t) __b);
20976 /* vminnm */
20978 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20979 vminnm_f32 (float32x2_t __a, float32x2_t __b)
20981 return __builtin_aarch64_sminv2sf (__a, __b);
20984 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20985 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
20987 return __builtin_aarch64_sminv4sf (__a, __b);
20990 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20991 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
20993 return __builtin_aarch64_sminv2df (__a, __b);
20996 /* vminv */
20998 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20999 vminv_f32 (float32x2_t __a)
21001 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
21004 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21005 vminv_s8 (int8x8_t __a)
21007 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
21010 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21011 vminv_s16 (int16x4_t __a)
21013 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
21016 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21017 vminv_s32 (int32x2_t __a)
21019 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
21022 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21023 vminv_u8 (uint8x8_t __a)
21025 return vget_lane_u8 ((uint8x8_t)
21026 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
21029 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21030 vminv_u16 (uint16x4_t __a)
21032 return vget_lane_u16 ((uint16x4_t)
21033 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
21036 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21037 vminv_u32 (uint32x2_t __a)
21039 return vget_lane_u32 ((uint32x2_t)
21040 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
21043 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21044 vminvq_f32 (float32x4_t __a)
21046 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
21049 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21050 vminvq_f64 (float64x2_t __a)
21052 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
21055 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21056 vminvq_s8 (int8x16_t __a)
21058 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
21061 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21062 vminvq_s16 (int16x8_t __a)
21064 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
21067 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21068 vminvq_s32 (int32x4_t __a)
21070 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
21073 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21074 vminvq_u8 (uint8x16_t __a)
21076 return vgetq_lane_u8 ((uint8x16_t)
21077 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
21080 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21081 vminvq_u16 (uint16x8_t __a)
21083 return vgetq_lane_u16 ((uint16x8_t)
21084 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
21087 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21088 vminvq_u32 (uint32x4_t __a)
21090 return vgetq_lane_u32 ((uint32x4_t)
21091 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
21094 /* vminnmv */
21096 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21097 vminnmv_f32 (float32x2_t __a)
21099 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
21102 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21103 vminnmvq_f32 (float32x4_t __a)
21105 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
21108 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21109 vminnmvq_f64 (float64x2_t __a)
21111 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
21114 /* vmla */
21116 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21117 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21119 return a + b * c;
21122 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21123 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21125 return a + b * c;
21128 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21129 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21131 return a + b * c;
21134 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21135 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21137 return a - b * c;
21140 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21141 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21143 return a - b * c;
21146 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21147 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21149 return a - b * c;
21152 /* vmul_lane */
21154 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21155 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
21157 return __a * __aarch64_vget_lane_f32 (__b, __lane);
21160 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21161 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
21163 return __a * __b;
21166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21167 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
21169 return __a * __aarch64_vget_lane_s16 (__b, __lane);
21172 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21173 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
21175 return __a * __aarch64_vget_lane_s32 (__b, __lane);
21178 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21179 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
21181 return __a * __aarch64_vget_lane_u16 (__b, __lane);
21184 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21185 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
21187 return __a * __aarch64_vget_lane_u32 (__b, __lane);
21190 /* vmul_laneq */
21192 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21193 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
21195 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
21198 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21199 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
21201 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
21204 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21205 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
21207 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
21210 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21211 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
21213 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
21216 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21217 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
21219 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
21222 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21223 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
21225 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
21228 /* vmulq_lane */
21230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21231 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
21233 return __a * __aarch64_vget_lane_f32 (__b, __lane);
21236 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21237 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
21239 return __a * __b;
21242 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21243 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
21245 return __a * __aarch64_vget_lane_s16 (__b, __lane);
21248 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21249 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
21251 return __a * __aarch64_vget_lane_s32 (__b, __lane);
21254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21255 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
21257 return __a * __aarch64_vget_lane_u16 (__b, __lane);
21260 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21261 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
21263 return __a * __aarch64_vget_lane_u32 (__b, __lane);
21266 /* vmulq_laneq */
21268 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21269 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
21271 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
21274 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21275 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
21277 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
21280 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21281 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
21283 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
21286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21287 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
21289 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
21292 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21293 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
21295 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
21298 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21299 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
21301 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
21304 /* vqabs */
21306 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21307 vqabsq_s64 (int64x2_t __a)
21309 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21312 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21313 vqabsb_s8 (int8x1_t __a)
21315 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21318 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21319 vqabsh_s16 (int16x1_t __a)
21321 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21324 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21325 vqabss_s32 (int32x1_t __a)
21327 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21330 /* vqadd */
21332 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21333 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21335 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21338 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21339 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21341 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21344 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21345 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21347 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21350 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21351 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21353 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21356 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21357 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21359 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21362 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21363 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21365 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21368 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21369 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21371 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21374 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21375 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21377 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21380 /* vqdmlal */
21382 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21383 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21385 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21389 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21391 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21394 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21395 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21396 int const __d)
21398 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21401 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21402 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21403 int const __d)
21405 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21408 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21409 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21411 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21414 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21415 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21417 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21418 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21422 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21424 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21427 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21428 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21430 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21433 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21434 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21436 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21439 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21440 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21442 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21445 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21446 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21447 int const __d)
21449 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21452 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21453 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21454 int const __d)
21456 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21459 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21460 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21462 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21465 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21466 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21468 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21469 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21472 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21473 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21475 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21478 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21479 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21481 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21484 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21485 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21487 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21490 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21491 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21493 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21496 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21497 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21499 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21502 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21503 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21505 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21508 /* vqdmlsl */
21510 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21511 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21513 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21517 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21519 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21523 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21524 int const __d)
21526 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21529 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21530 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21531 int const __d)
21533 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21536 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21537 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21539 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21542 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21543 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21545 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21546 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21549 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21550 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21552 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21555 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21556 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21558 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21561 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21562 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21564 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21567 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21568 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21570 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21573 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21574 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21575 int const __d)
21577 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21580 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21581 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21582 int const __d)
21584 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21587 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21588 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21590 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21594 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21596 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21597 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21600 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21601 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21603 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21606 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21607 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21609 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21612 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21613 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21615 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21618 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21619 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21621 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21624 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21625 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21627 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21630 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21631 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21633 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21636 /* vqdmulh */
21638 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21639 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21641 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21644 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21645 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21647 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21650 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21651 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21653 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21656 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21657 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21659 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21662 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21663 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21665 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21668 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21669 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21671 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21674 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21675 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21677 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21680 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21681 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21683 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21686 /* vqdmull */
21688 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21689 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21691 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21694 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21695 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21697 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21700 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21701 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21703 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21706 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21707 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21709 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21712 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21713 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21715 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21718 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21719 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21721 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
21722 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21725 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21726 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21728 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21731 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21732 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21734 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21737 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21738 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21740 return __builtin_aarch64_sqdmullv2si (__a, __b);
21743 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21744 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21746 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21749 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21750 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21752 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21755 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21756 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21758 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21761 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21762 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21764 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21767 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21768 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21770 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
21771 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21774 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21775 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21777 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21780 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21781 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21783 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21786 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21787 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21789 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21792 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21793 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21795 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21798 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21799 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21801 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21804 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21805 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21807 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21810 /* vqmovn */
21812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21813 vqmovn_s16 (int16x8_t __a)
21815 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21818 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21819 vqmovn_s32 (int32x4_t __a)
21821 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21824 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21825 vqmovn_s64 (int64x2_t __a)
21827 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21830 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21831 vqmovn_u16 (uint16x8_t __a)
21833 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21836 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21837 vqmovn_u32 (uint32x4_t __a)
21839 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21842 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21843 vqmovn_u64 (uint64x2_t __a)
21845 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21848 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21849 vqmovnh_s16 (int16x1_t __a)
21851 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21854 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21855 vqmovns_s32 (int32x1_t __a)
21857 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21860 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21861 vqmovnd_s64 (int64x1_t __a)
21863 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21866 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21867 vqmovnh_u16 (uint16x1_t __a)
21869 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21872 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21873 vqmovns_u32 (uint32x1_t __a)
21875 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21878 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21879 vqmovnd_u64 (uint64x1_t __a)
21881 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21884 /* vqmovun */
21886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21887 vqmovun_s16 (int16x8_t __a)
21889 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21892 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21893 vqmovun_s32 (int32x4_t __a)
21895 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21898 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21899 vqmovun_s64 (int64x2_t __a)
21901 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21904 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21905 vqmovunh_s16 (int16x1_t __a)
21907 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21910 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21911 vqmovuns_s32 (int32x1_t __a)
21913 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21916 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21917 vqmovund_s64 (int64x1_t __a)
21919 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21922 /* vqneg */
21924 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21925 vqnegq_s64 (int64x2_t __a)
21927 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21930 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21931 vqnegb_s8 (int8x1_t __a)
21933 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21936 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21937 vqnegh_s16 (int16x1_t __a)
21939 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21942 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21943 vqnegs_s32 (int32x1_t __a)
21945 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21948 /* vqrdmulh */
21950 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21951 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21953 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21956 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21957 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21959 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21962 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21963 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21965 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21968 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21969 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21971 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21974 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21975 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21977 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21980 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21981 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21983 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21986 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21987 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21989 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21992 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21993 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21995 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21998 /* vqrshl */
22000 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22001 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
22003 return __builtin_aarch64_sqrshlv8qi (__a, __b);
22006 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22007 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
22009 return __builtin_aarch64_sqrshlv4hi (__a, __b);
22012 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22013 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
22015 return __builtin_aarch64_sqrshlv2si (__a, __b);
22018 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22019 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
22021 return __builtin_aarch64_sqrshldi (__a, __b);
22024 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22025 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
22027 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
22030 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22031 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
22033 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
22036 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22037 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
22039 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
22042 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22043 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
22045 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
22048 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22049 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
22051 return __builtin_aarch64_sqrshlv16qi (__a, __b);
22054 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22055 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
22057 return __builtin_aarch64_sqrshlv8hi (__a, __b);
22060 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22061 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
22063 return __builtin_aarch64_sqrshlv4si (__a, __b);
22066 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22067 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
22069 return __builtin_aarch64_sqrshlv2di (__a, __b);
22072 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22073 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22075 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
22078 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22079 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22081 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
22084 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22085 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22087 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
22090 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22091 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22093 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
22096 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22097 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
22099 return __builtin_aarch64_sqrshlqi (__a, __b);
22102 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22103 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
22105 return __builtin_aarch64_sqrshlhi (__a, __b);
22108 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22109 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
22111 return __builtin_aarch64_sqrshlsi (__a, __b);
22114 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22115 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
22117 return __builtin_aarch64_sqrshldi (__a, __b);
22120 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22121 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22123 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
22126 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22127 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22129 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
22132 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22133 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22135 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22138 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22139 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22141 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22144 /* vqrshrn */
22146 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22147 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22149 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22152 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22153 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22155 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22159 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22161 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22165 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22167 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22171 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22173 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22177 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22179 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22182 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22183 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22185 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22188 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22189 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22191 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22194 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22195 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22197 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22200 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22201 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22203 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22206 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22207 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22209 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22212 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22213 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22215 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22218 /* vqrshrun */
22220 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22221 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22223 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22226 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22227 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22229 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22232 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22233 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22235 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22238 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22239 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22241 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22244 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22245 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22247 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22250 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22251 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22253 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22256 /* vqshl */
22258 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22259 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22261 return __builtin_aarch64_sqshlv8qi (__a, __b);
22264 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22265 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22267 return __builtin_aarch64_sqshlv4hi (__a, __b);
22270 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22271 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22273 return __builtin_aarch64_sqshlv2si (__a, __b);
22276 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22277 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22279 return __builtin_aarch64_sqshldi (__a, __b);
22282 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22283 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22285 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22288 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22289 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22291 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22295 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22297 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22300 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22301 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22303 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22306 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22307 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22309 return __builtin_aarch64_sqshlv16qi (__a, __b);
22312 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22313 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22315 return __builtin_aarch64_sqshlv8hi (__a, __b);
22318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22319 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22321 return __builtin_aarch64_sqshlv4si (__a, __b);
22324 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22325 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22327 return __builtin_aarch64_sqshlv2di (__a, __b);
22330 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22331 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22333 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22336 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22337 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22339 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22342 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22343 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22345 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22348 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22349 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22351 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22354 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22355 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22357 return __builtin_aarch64_sqshlqi (__a, __b);
22360 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22361 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22363 return __builtin_aarch64_sqshlhi (__a, __b);
22366 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22367 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22369 return __builtin_aarch64_sqshlsi (__a, __b);
22372 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22373 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22375 return __builtin_aarch64_sqshldi (__a, __b);
22378 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22379 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22381 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22384 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22385 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22387 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22390 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22391 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22393 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22396 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22397 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22399 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22402 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22403 vqshl_n_s8 (int8x8_t __a, const int __b)
22405 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22408 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22409 vqshl_n_s16 (int16x4_t __a, const int __b)
22411 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22414 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22415 vqshl_n_s32 (int32x2_t __a, const int __b)
22417 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22420 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22421 vqshl_n_s64 (int64x1_t __a, const int __b)
22423 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22426 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22427 vqshl_n_u8 (uint8x8_t __a, const int __b)
22429 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22432 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22433 vqshl_n_u16 (uint16x4_t __a, const int __b)
22435 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22438 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22439 vqshl_n_u32 (uint32x2_t __a, const int __b)
22441 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22444 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22445 vqshl_n_u64 (uint64x1_t __a, const int __b)
22447 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22450 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22451 vqshlq_n_s8 (int8x16_t __a, const int __b)
22453 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22456 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22457 vqshlq_n_s16 (int16x8_t __a, const int __b)
22459 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22462 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22463 vqshlq_n_s32 (int32x4_t __a, const int __b)
22465 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22468 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22469 vqshlq_n_s64 (int64x2_t __a, const int __b)
22471 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22474 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22475 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22477 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22480 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22481 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22483 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22486 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22487 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22489 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22492 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22493 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22495 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22498 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22499 vqshlb_n_s8 (int8x1_t __a, const int __b)
22501 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22504 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22505 vqshlh_n_s16 (int16x1_t __a, const int __b)
22507 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22510 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22511 vqshls_n_s32 (int32x1_t __a, const int __b)
22513 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22516 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22517 vqshld_n_s64 (int64x1_t __a, const int __b)
22519 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22522 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22523 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22525 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22528 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22529 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22531 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22534 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22535 vqshls_n_u32 (uint32x1_t __a, const int __b)
22537 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22540 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22541 vqshld_n_u64 (uint64x1_t __a, const int __b)
22543 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22546 /* vqshlu */
22548 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22549 vqshlu_n_s8 (int8x8_t __a, const int __b)
22551 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22554 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22555 vqshlu_n_s16 (int16x4_t __a, const int __b)
22557 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22560 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22561 vqshlu_n_s32 (int32x2_t __a, const int __b)
22563 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22566 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22567 vqshlu_n_s64 (int64x1_t __a, const int __b)
22569 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22572 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22573 vqshluq_n_s8 (int8x16_t __a, const int __b)
22575 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22578 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22579 vqshluq_n_s16 (int16x8_t __a, const int __b)
22581 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22584 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22585 vqshluq_n_s32 (int32x4_t __a, const int __b)
22587 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22590 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22591 vqshluq_n_s64 (int64x2_t __a, const int __b)
22593 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22596 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22597 vqshlub_n_s8 (int8x1_t __a, const int __b)
22599 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22602 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22603 vqshluh_n_s16 (int16x1_t __a, const int __b)
22605 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22608 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22609 vqshlus_n_s32 (int32x1_t __a, const int __b)
22611 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22614 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22615 vqshlud_n_s64 (int64x1_t __a, const int __b)
22617 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22620 /* vqshrn */
22622 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22623 vqshrn_n_s16 (int16x8_t __a, const int __b)
22625 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22629 vqshrn_n_s32 (int32x4_t __a, const int __b)
22631 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22635 vqshrn_n_s64 (int64x2_t __a, const int __b)
22637 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22640 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22641 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22643 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22647 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22649 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22653 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22655 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22658 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22659 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22661 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22664 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22665 vqshrns_n_s32 (int32x1_t __a, const int __b)
22667 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22670 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22671 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22673 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22676 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22677 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22679 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22682 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22683 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22685 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22688 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22689 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22691 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22694 /* vqshrun */
22696 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22697 vqshrun_n_s16 (int16x8_t __a, const int __b)
22699 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22702 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22703 vqshrun_n_s32 (int32x4_t __a, const int __b)
22705 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22708 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22709 vqshrun_n_s64 (int64x2_t __a, const int __b)
22711 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22714 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22715 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22717 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22720 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22721 vqshruns_n_s32 (int32x1_t __a, const int __b)
22723 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22726 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22727 vqshrund_n_s64 (int64x1_t __a, const int __b)
22729 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22732 /* vqsub */
22734 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22735 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22737 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22740 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22741 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22743 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22746 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22747 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22749 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22752 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22753 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22755 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22758 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22759 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22761 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22764 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22765 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22767 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22770 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22771 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22773 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22776 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22777 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22779 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22782 /* vrecpe */
22784 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22785 vrecpes_f32 (float32_t __a)
22787 return __builtin_aarch64_frecpesf (__a);
22790 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22791 vrecped_f64 (float64_t __a)
22793 return __builtin_aarch64_frecpedf (__a);
22796 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22797 vrecpe_f32 (float32x2_t __a)
22799 return __builtin_aarch64_frecpev2sf (__a);
22802 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22803 vrecpeq_f32 (float32x4_t __a)
22805 return __builtin_aarch64_frecpev4sf (__a);
22808 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22809 vrecpeq_f64 (float64x2_t __a)
22811 return __builtin_aarch64_frecpev2df (__a);
22814 /* vrecps */
22816 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22817 vrecpss_f32 (float32_t __a, float32_t __b)
22819 return __builtin_aarch64_frecpssf (__a, __b);
22822 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22823 vrecpsd_f64 (float64_t __a, float64_t __b)
22825 return __builtin_aarch64_frecpsdf (__a, __b);
22828 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22829 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22831 return __builtin_aarch64_frecpsv2sf (__a, __b);
22834 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22835 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22837 return __builtin_aarch64_frecpsv4sf (__a, __b);
22840 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22841 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22843 return __builtin_aarch64_frecpsv2df (__a, __b);
22846 /* vrecpx */
22848 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22849 vrecpxs_f32 (float32_t __a)
22851 return __builtin_aarch64_frecpxsf (__a);
22854 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22855 vrecpxd_f64 (float64_t __a)
22857 return __builtin_aarch64_frecpxdf (__a);
22860 /* vrnd */
22862 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22863 vrnd_f32 (float32x2_t __a)
22865 return __builtin_aarch64_btruncv2sf (__a);
22868 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22869 vrndq_f32 (float32x4_t __a)
22871 return __builtin_aarch64_btruncv4sf (__a);
22874 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22875 vrndq_f64 (float64x2_t __a)
22877 return __builtin_aarch64_btruncv2df (__a);
22880 /* vrnda */
22882 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22883 vrnda_f32 (float32x2_t __a)
22885 return __builtin_aarch64_roundv2sf (__a);
22888 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22889 vrndaq_f32 (float32x4_t __a)
22891 return __builtin_aarch64_roundv4sf (__a);
22894 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22895 vrndaq_f64 (float64x2_t __a)
22897 return __builtin_aarch64_roundv2df (__a);
22900 /* vrndi */
22902 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22903 vrndi_f32 (float32x2_t __a)
22905 return __builtin_aarch64_nearbyintv2sf (__a);
22908 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22909 vrndiq_f32 (float32x4_t __a)
22911 return __builtin_aarch64_nearbyintv4sf (__a);
22914 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22915 vrndiq_f64 (float64x2_t __a)
22917 return __builtin_aarch64_nearbyintv2df (__a);
22920 /* vrndm */
22922 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22923 vrndm_f32 (float32x2_t __a)
22925 return __builtin_aarch64_floorv2sf (__a);
22928 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22929 vrndmq_f32 (float32x4_t __a)
22931 return __builtin_aarch64_floorv4sf (__a);
22934 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22935 vrndmq_f64 (float64x2_t __a)
22937 return __builtin_aarch64_floorv2df (__a);
22940 /* vrndn */
22942 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22943 vrndn_f32 (float32x2_t __a)
22945 return __builtin_aarch64_frintnv2sf (__a);
22947 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22948 vrndnq_f32 (float32x4_t __a)
22950 return __builtin_aarch64_frintnv4sf (__a);
22953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22954 vrndnq_f64 (float64x2_t __a)
22956 return __builtin_aarch64_frintnv2df (__a);
22959 /* vrndp */
22961 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22962 vrndp_f32 (float32x2_t __a)
22964 return __builtin_aarch64_ceilv2sf (__a);
22967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22968 vrndpq_f32 (float32x4_t __a)
22970 return __builtin_aarch64_ceilv4sf (__a);
22973 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22974 vrndpq_f64 (float64x2_t __a)
22976 return __builtin_aarch64_ceilv2df (__a);
22979 /* vrndx */
22981 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22982 vrndx_f32 (float32x2_t __a)
22984 return __builtin_aarch64_rintv2sf (__a);
22987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22988 vrndxq_f32 (float32x4_t __a)
22990 return __builtin_aarch64_rintv4sf (__a);
22993 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22994 vrndxq_f64 (float64x2_t __a)
22996 return __builtin_aarch64_rintv2df (__a);
22999 /* vrshl */
23001 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23002 vrshl_s8 (int8x8_t __a, int8x8_t __b)
23004 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
23007 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23008 vrshl_s16 (int16x4_t __a, int16x4_t __b)
23010 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
23013 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23014 vrshl_s32 (int32x2_t __a, int32x2_t __b)
23016 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
23019 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23020 vrshl_s64 (int64x1_t __a, int64x1_t __b)
23022 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23025 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23026 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
23028 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
23031 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23032 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
23034 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
23037 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23038 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
23040 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
23043 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23044 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
23046 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
23049 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23050 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
23052 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
23055 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23056 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
23058 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
23061 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23062 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
23064 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
23067 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23068 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
23070 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
23073 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23074 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
23076 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
23079 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23080 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
23082 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
23085 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23086 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
23088 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
23091 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23092 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
23094 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
23097 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23098 vrshld_s64 (int64x1_t __a, int64x1_t __b)
23100 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23103 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23104 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
23106 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
23109 /* vrshr */
23111 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23112 vrshr_n_s8 (int8x8_t __a, const int __b)
23114 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
23117 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23118 vrshr_n_s16 (int16x4_t __a, const int __b)
23120 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
23123 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23124 vrshr_n_s32 (int32x2_t __a, const int __b)
23126 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
23129 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23130 vrshr_n_s64 (int64x1_t __a, const int __b)
23132 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23135 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23136 vrshr_n_u8 (uint8x8_t __a, const int __b)
23138 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23141 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23142 vrshr_n_u16 (uint16x4_t __a, const int __b)
23144 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23147 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23148 vrshr_n_u32 (uint32x2_t __a, const int __b)
23150 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23154 vrshr_n_u64 (uint64x1_t __a, const int __b)
23156 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23159 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23160 vrshrq_n_s8 (int8x16_t __a, const int __b)
23162 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23165 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23166 vrshrq_n_s16 (int16x8_t __a, const int __b)
23168 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23171 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23172 vrshrq_n_s32 (int32x4_t __a, const int __b)
23174 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23178 vrshrq_n_s64 (int64x2_t __a, const int __b)
23180 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23184 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23186 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23189 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23190 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23192 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23196 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23198 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23202 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23204 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23207 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23208 vrshrd_n_s64 (int64x1_t __a, const int __b)
23210 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23213 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23214 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23216 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23219 /* vrsra */
23221 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23222 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23224 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23227 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23228 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23230 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23234 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23236 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23239 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23240 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23242 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23245 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23246 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23248 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23249 (int8x8_t) __b, __c);
23252 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23253 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23255 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23256 (int16x4_t) __b, __c);
23259 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23260 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23262 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23263 (int32x2_t) __b, __c);
23266 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23267 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23269 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23270 (int64x1_t) __b, __c);
23273 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23274 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23276 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23279 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23280 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23282 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23285 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23286 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23288 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23291 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23292 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23294 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23297 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23298 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23300 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23301 (int8x16_t) __b, __c);
23304 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23305 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23307 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23308 (int16x8_t) __b, __c);
23311 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23312 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23314 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23315 (int32x4_t) __b, __c);
23318 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23319 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23321 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23322 (int64x2_t) __b, __c);
23325 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23326 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23328 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23331 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23332 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23334 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23337 /* vshl */
23339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23340 vshl_n_s8 (int8x8_t __a, const int __b)
23342 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23346 vshl_n_s16 (int16x4_t __a, const int __b)
23348 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23352 vshl_n_s32 (int32x2_t __a, const int __b)
23354 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23357 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23358 vshl_n_s64 (int64x1_t __a, const int __b)
23360 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23363 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23364 vshl_n_u8 (uint8x8_t __a, const int __b)
23366 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23369 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23370 vshl_n_u16 (uint16x4_t __a, const int __b)
23372 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23375 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23376 vshl_n_u32 (uint32x2_t __a, const int __b)
23378 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23381 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23382 vshl_n_u64 (uint64x1_t __a, const int __b)
23384 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23387 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23388 vshlq_n_s8 (int8x16_t __a, const int __b)
23390 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23393 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23394 vshlq_n_s16 (int16x8_t __a, const int __b)
23396 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23400 vshlq_n_s32 (int32x4_t __a, const int __b)
23402 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23406 vshlq_n_s64 (int64x2_t __a, const int __b)
23408 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23411 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23412 vshlq_n_u8 (uint8x16_t __a, const int __b)
23414 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23417 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23418 vshlq_n_u16 (uint16x8_t __a, const int __b)
23420 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23423 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23424 vshlq_n_u32 (uint32x4_t __a, const int __b)
23426 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23429 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23430 vshlq_n_u64 (uint64x2_t __a, const int __b)
23432 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23435 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23436 vshld_n_s64 (int64x1_t __a, const int __b)
23438 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23441 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23442 vshld_n_u64 (uint64x1_t __a, const int __b)
23444 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23447 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23448 vshl_s8 (int8x8_t __a, int8x8_t __b)
23450 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23453 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23454 vshl_s16 (int16x4_t __a, int16x4_t __b)
23456 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23459 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23460 vshl_s32 (int32x2_t __a, int32x2_t __b)
23462 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23465 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23466 vshl_s64 (int64x1_t __a, int64x1_t __b)
23468 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23471 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23472 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23474 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23477 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23478 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23480 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23484 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23486 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23489 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23490 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23492 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23495 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23496 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23498 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23501 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23502 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23504 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23507 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23508 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23510 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23513 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23514 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23516 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23519 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23520 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23522 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23525 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23526 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23528 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23531 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23532 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23534 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23537 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23538 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23540 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23543 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23544 vshld_s64 (int64x1_t __a, int64x1_t __b)
23546 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23549 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23550 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23552 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23555 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23556 vshll_high_n_s8 (int8x16_t __a, const int __b)
23558 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23561 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23562 vshll_high_n_s16 (int16x8_t __a, const int __b)
23564 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23567 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23568 vshll_high_n_s32 (int32x4_t __a, const int __b)
23570 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23573 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23574 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23576 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23579 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23580 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23582 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23585 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23586 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23588 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23591 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23592 vshll_n_s8 (int8x8_t __a, const int __b)
23594 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23597 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23598 vshll_n_s16 (int16x4_t __a, const int __b)
23600 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23603 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23604 vshll_n_s32 (int32x2_t __a, const int __b)
23606 return __builtin_aarch64_sshll_nv2si (__a, __b);
23609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23610 vshll_n_u8 (uint8x8_t __a, const int __b)
23612 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23616 vshll_n_u16 (uint16x4_t __a, const int __b)
23618 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23622 vshll_n_u32 (uint32x2_t __a, const int __b)
23624 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23627 /* vshr */
23629 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23630 vshr_n_s8 (int8x8_t __a, const int __b)
23632 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23635 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23636 vshr_n_s16 (int16x4_t __a, const int __b)
23638 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23641 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23642 vshr_n_s32 (int32x2_t __a, const int __b)
23644 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23647 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23648 vshr_n_s64 (int64x1_t __a, const int __b)
23650 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23653 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23654 vshr_n_u8 (uint8x8_t __a, const int __b)
23656 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23659 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23660 vshr_n_u16 (uint16x4_t __a, const int __b)
23662 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23665 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23666 vshr_n_u32 (uint32x2_t __a, const int __b)
23668 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23671 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23672 vshr_n_u64 (uint64x1_t __a, const int __b)
23674 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23677 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23678 vshrq_n_s8 (int8x16_t __a, const int __b)
23680 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23684 vshrq_n_s16 (int16x8_t __a, const int __b)
23686 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23689 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23690 vshrq_n_s32 (int32x4_t __a, const int __b)
23692 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23695 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23696 vshrq_n_s64 (int64x2_t __a, const int __b)
23698 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23701 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23702 vshrq_n_u8 (uint8x16_t __a, const int __b)
23704 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23707 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23708 vshrq_n_u16 (uint16x8_t __a, const int __b)
23710 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23713 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23714 vshrq_n_u32 (uint32x4_t __a, const int __b)
23716 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23719 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23720 vshrq_n_u64 (uint64x2_t __a, const int __b)
23722 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23725 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23726 vshrd_n_s64 (int64x1_t __a, const int __b)
23728 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23731 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23732 vshrd_n_u64 (uint64x1_t __a, const int __b)
23734 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23737 /* vsli */
23739 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23740 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23742 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23745 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23746 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23748 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23751 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23752 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23754 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23757 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23758 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23760 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23763 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23764 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23766 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23767 (int8x8_t) __b, __c);
23770 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23771 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23773 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23774 (int16x4_t) __b, __c);
23777 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23778 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23780 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23781 (int32x2_t) __b, __c);
23784 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23785 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23787 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23788 (int64x1_t) __b, __c);
23791 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23792 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23794 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23797 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23798 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23800 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23804 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23806 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23809 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23810 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23812 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23815 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23816 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23818 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23819 (int8x16_t) __b, __c);
23822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23823 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23825 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23826 (int16x8_t) __b, __c);
23829 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23830 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23832 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23833 (int32x4_t) __b, __c);
23836 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23837 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23839 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23840 (int64x2_t) __b, __c);
23843 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23844 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23846 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23849 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23850 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23852 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23855 /* vsqadd */
23857 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23858 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23860 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23861 (int8x8_t) __b);
23864 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23865 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23867 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23868 (int16x4_t) __b);
23871 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23872 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23874 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23875 (int32x2_t) __b);
23878 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23879 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23881 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23884 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23885 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23887 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23888 (int8x16_t) __b);
23891 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23892 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23894 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23895 (int16x8_t) __b);
23898 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23899 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23901 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23902 (int32x4_t) __b);
23905 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23906 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23908 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23909 (int64x2_t) __b);
23912 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23913 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23915 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23918 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23919 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23921 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23924 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23925 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23927 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23930 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23931 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23933 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23936 /* vsqrt */
23937 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23938 vsqrt_f32 (float32x2_t a)
23940 return __builtin_aarch64_sqrtv2sf (a);
23943 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23944 vsqrtq_f32 (float32x4_t a)
23946 return __builtin_aarch64_sqrtv4sf (a);
23949 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23950 vsqrtq_f64 (float64x2_t a)
23952 return __builtin_aarch64_sqrtv2df (a);
23955 /* vsra */
23957 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23958 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23960 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23963 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23964 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23966 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23969 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23970 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23972 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23975 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23976 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23978 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23981 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23982 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23984 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23985 (int8x8_t) __b, __c);
23988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23989 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23991 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23992 (int16x4_t) __b, __c);
23995 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23996 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23998 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23999 (int32x2_t) __b, __c);
24002 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24003 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24005 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
24006 (int64x1_t) __b, __c);
24009 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24010 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24012 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
24015 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24016 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24018 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
24021 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24022 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24024 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
24027 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24028 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24030 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
24033 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24034 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24036 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
24037 (int8x16_t) __b, __c);
24040 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24041 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24043 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
24044 (int16x8_t) __b, __c);
24047 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24048 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24050 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
24051 (int32x4_t) __b, __c);
24054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24055 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24057 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
24058 (int64x2_t) __b, __c);
24061 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24062 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24064 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24067 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24068 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24070 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
24073 /* vsri */
24075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24076 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24078 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
24081 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24082 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24084 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
24087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24088 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24090 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
24093 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24094 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24096 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24099 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24100 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24102 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
24103 (int8x8_t) __b, __c);
24106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24107 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24109 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
24110 (int16x4_t) __b, __c);
24113 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24114 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24116 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
24117 (int32x2_t) __b, __c);
24120 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24121 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24123 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
24124 (int64x1_t) __b, __c);
24127 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24128 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24130 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
24133 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24134 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24136 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24140 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24142 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24145 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24146 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24148 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24151 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24152 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24154 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24155 (int8x16_t) __b, __c);
24158 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24159 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24161 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24162 (int16x8_t) __b, __c);
24165 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24166 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24168 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24169 (int32x4_t) __b, __c);
24172 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24173 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24175 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24176 (int64x2_t) __b, __c);
24179 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24180 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24182 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24185 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24186 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24188 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24191 /* vst1 */
24193 __extension__ static __inline void __attribute__ ((__always_inline__))
24194 vst1_f32 (float32_t *a, float32x2_t b)
24196 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
24199 __extension__ static __inline void __attribute__ ((__always_inline__))
24200 vst1_f64 (float64_t *a, float64x1_t b)
24202 *a = b;
24205 __extension__ static __inline void __attribute__ ((__always_inline__))
24206 vst1_p8 (poly8_t *a, poly8x8_t b)
24208 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24209 (int8x8_t) b);
24212 __extension__ static __inline void __attribute__ ((__always_inline__))
24213 vst1_p16 (poly16_t *a, poly16x4_t b)
24215 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24216 (int16x4_t) b);
24219 __extension__ static __inline void __attribute__ ((__always_inline__))
24220 vst1_s8 (int8_t *a, int8x8_t b)
24222 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
24225 __extension__ static __inline void __attribute__ ((__always_inline__))
24226 vst1_s16 (int16_t *a, int16x4_t b)
24228 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
24231 __extension__ static __inline void __attribute__ ((__always_inline__))
24232 vst1_s32 (int32_t *a, int32x2_t b)
24234 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
24237 __extension__ static __inline void __attribute__ ((__always_inline__))
24238 vst1_s64 (int64_t *a, int64x1_t b)
24240 *a = b;
24243 __extension__ static __inline void __attribute__ ((__always_inline__))
24244 vst1_u8 (uint8_t *a, uint8x8_t b)
24246 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24247 (int8x8_t) b);
24250 __extension__ static __inline void __attribute__ ((__always_inline__))
24251 vst1_u16 (uint16_t *a, uint16x4_t b)
24253 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24254 (int16x4_t) b);
24257 __extension__ static __inline void __attribute__ ((__always_inline__))
24258 vst1_u32 (uint32_t *a, uint32x2_t b)
24260 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
24261 (int32x2_t) b);
24264 __extension__ static __inline void __attribute__ ((__always_inline__))
24265 vst1_u64 (uint64_t *a, uint64x1_t b)
24267 *a = b;
24270 __extension__ static __inline void __attribute__ ((__always_inline__))
24271 vst1q_f32 (float32_t *a, float32x4_t b)
24273 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
24276 __extension__ static __inline void __attribute__ ((__always_inline__))
24277 vst1q_f64 (float64_t *a, float64x2_t b)
24279 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
24282 /* vst1q */
24284 __extension__ static __inline void __attribute__ ((__always_inline__))
24285 vst1q_p8 (poly8_t *a, poly8x16_t b)
24287 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24288 (int8x16_t) b);
24291 __extension__ static __inline void __attribute__ ((__always_inline__))
24292 vst1q_p16 (poly16_t *a, poly16x8_t b)
24294 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24295 (int16x8_t) b);
24298 __extension__ static __inline void __attribute__ ((__always_inline__))
24299 vst1q_s8 (int8_t *a, int8x16_t b)
24301 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
24304 __extension__ static __inline void __attribute__ ((__always_inline__))
24305 vst1q_s16 (int16_t *a, int16x8_t b)
24307 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24310 __extension__ static __inline void __attribute__ ((__always_inline__))
24311 vst1q_s32 (int32_t *a, int32x4_t b)
24313 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24316 __extension__ static __inline void __attribute__ ((__always_inline__))
24317 vst1q_s64 (int64_t *a, int64x2_t b)
24319 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24322 __extension__ static __inline void __attribute__ ((__always_inline__))
24323 vst1q_u8 (uint8_t *a, uint8x16_t b)
24325 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24326 (int8x16_t) b);
24329 __extension__ static __inline void __attribute__ ((__always_inline__))
24330 vst1q_u16 (uint16_t *a, uint16x8_t b)
24332 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24333 (int16x8_t) b);
24336 __extension__ static __inline void __attribute__ ((__always_inline__))
24337 vst1q_u32 (uint32_t *a, uint32x4_t b)
24339 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24340 (int32x4_t) b);
24343 __extension__ static __inline void __attribute__ ((__always_inline__))
24344 vst1q_u64 (uint64_t *a, uint64x2_t b)
24346 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24347 (int64x2_t) b);
24350 /* vstn */
24352 __extension__ static __inline void
24353 vst2_s64 (int64_t * __a, int64x1x2_t val)
24355 __builtin_aarch64_simd_oi __o;
24356 int64x2x2_t temp;
24357 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24358 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24359 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24360 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24361 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24364 __extension__ static __inline void
24365 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24367 __builtin_aarch64_simd_oi __o;
24368 uint64x2x2_t temp;
24369 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24370 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24371 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24372 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24373 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24376 __extension__ static __inline void
24377 vst2_f64 (float64_t * __a, float64x1x2_t val)
24379 __builtin_aarch64_simd_oi __o;
24380 float64x2x2_t temp;
24381 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24382 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24383 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24384 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24385 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24388 __extension__ static __inline void
24389 vst2_s8 (int8_t * __a, int8x8x2_t val)
24391 __builtin_aarch64_simd_oi __o;
24392 int8x16x2_t temp;
24393 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24394 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24395 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24396 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24397 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24400 __extension__ static __inline void __attribute__ ((__always_inline__))
24401 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24403 __builtin_aarch64_simd_oi __o;
24404 poly8x16x2_t temp;
24405 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24406 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24407 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24408 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24409 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24412 __extension__ static __inline void __attribute__ ((__always_inline__))
24413 vst2_s16 (int16_t * __a, int16x4x2_t val)
24415 __builtin_aarch64_simd_oi __o;
24416 int16x8x2_t temp;
24417 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24418 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24419 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24420 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24421 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24424 __extension__ static __inline void __attribute__ ((__always_inline__))
24425 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24427 __builtin_aarch64_simd_oi __o;
24428 poly16x8x2_t temp;
24429 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24430 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24431 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24432 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24433 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24436 __extension__ static __inline void __attribute__ ((__always_inline__))
24437 vst2_s32 (int32_t * __a, int32x2x2_t val)
24439 __builtin_aarch64_simd_oi __o;
24440 int32x4x2_t temp;
24441 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24442 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24443 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24444 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24445 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24448 __extension__ static __inline void __attribute__ ((__always_inline__))
24449 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24451 __builtin_aarch64_simd_oi __o;
24452 uint8x16x2_t temp;
24453 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24454 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24455 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24456 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24457 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24460 __extension__ static __inline void __attribute__ ((__always_inline__))
24461 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24463 __builtin_aarch64_simd_oi __o;
24464 uint16x8x2_t temp;
24465 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24466 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24467 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24468 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24469 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24472 __extension__ static __inline void __attribute__ ((__always_inline__))
24473 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24475 __builtin_aarch64_simd_oi __o;
24476 uint32x4x2_t temp;
24477 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24478 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24479 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24480 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24481 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24484 __extension__ static __inline void __attribute__ ((__always_inline__))
24485 vst2_f32 (float32_t * __a, float32x2x2_t val)
24487 __builtin_aarch64_simd_oi __o;
24488 float32x4x2_t temp;
24489 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24490 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24491 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24492 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24493 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24496 __extension__ static __inline void __attribute__ ((__always_inline__))
24497 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24499 __builtin_aarch64_simd_oi __o;
24500 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24501 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24502 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24505 __extension__ static __inline void __attribute__ ((__always_inline__))
24506 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24508 __builtin_aarch64_simd_oi __o;
24509 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24510 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24511 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24514 __extension__ static __inline void __attribute__ ((__always_inline__))
24515 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24517 __builtin_aarch64_simd_oi __o;
24518 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24519 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24520 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24523 __extension__ static __inline void __attribute__ ((__always_inline__))
24524 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24526 __builtin_aarch64_simd_oi __o;
24527 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24528 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24529 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24532 __extension__ static __inline void __attribute__ ((__always_inline__))
24533 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24535 __builtin_aarch64_simd_oi __o;
24536 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24537 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24538 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24541 __extension__ static __inline void __attribute__ ((__always_inline__))
24542 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24544 __builtin_aarch64_simd_oi __o;
24545 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24546 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24547 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24550 __extension__ static __inline void __attribute__ ((__always_inline__))
24551 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24553 __builtin_aarch64_simd_oi __o;
24554 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24555 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24556 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24559 __extension__ static __inline void __attribute__ ((__always_inline__))
24560 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24562 __builtin_aarch64_simd_oi __o;
24563 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24564 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24565 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24568 __extension__ static __inline void __attribute__ ((__always_inline__))
24569 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24571 __builtin_aarch64_simd_oi __o;
24572 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24573 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24574 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24577 __extension__ static __inline void __attribute__ ((__always_inline__))
24578 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24580 __builtin_aarch64_simd_oi __o;
24581 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24582 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24583 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24586 __extension__ static __inline void __attribute__ ((__always_inline__))
24587 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24589 __builtin_aarch64_simd_oi __o;
24590 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24591 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24592 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24595 __extension__ static __inline void __attribute__ ((__always_inline__))
24596 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24598 __builtin_aarch64_simd_oi __o;
24599 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24600 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24601 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24604 __extension__ static __inline void
24605 vst3_s64 (int64_t * __a, int64x1x3_t val)
24607 __builtin_aarch64_simd_ci __o;
24608 int64x2x3_t temp;
24609 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24610 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24611 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24612 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24613 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24614 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24615 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24618 __extension__ static __inline void
24619 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24621 __builtin_aarch64_simd_ci __o;
24622 uint64x2x3_t temp;
24623 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24624 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24625 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24626 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24627 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24628 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24629 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24632 __extension__ static __inline void
24633 vst3_f64 (float64_t * __a, float64x1x3_t val)
24635 __builtin_aarch64_simd_ci __o;
24636 float64x2x3_t temp;
24637 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24638 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24639 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24640 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24641 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24642 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24643 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24646 __extension__ static __inline void
24647 vst3_s8 (int8_t * __a, int8x8x3_t val)
24649 __builtin_aarch64_simd_ci __o;
24650 int8x16x3_t temp;
24651 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24652 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24653 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24654 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24655 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24656 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24657 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24660 __extension__ static __inline void __attribute__ ((__always_inline__))
24661 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24663 __builtin_aarch64_simd_ci __o;
24664 poly8x16x3_t temp;
24665 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24666 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24667 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24668 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24669 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24670 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24671 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24674 __extension__ static __inline void __attribute__ ((__always_inline__))
24675 vst3_s16 (int16_t * __a, int16x4x3_t val)
24677 __builtin_aarch64_simd_ci __o;
24678 int16x8x3_t temp;
24679 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24680 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24681 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24682 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24683 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24684 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24685 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24688 __extension__ static __inline void __attribute__ ((__always_inline__))
24689 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24691 __builtin_aarch64_simd_ci __o;
24692 poly16x8x3_t temp;
24693 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24694 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24695 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24696 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24697 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24698 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24699 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24702 __extension__ static __inline void __attribute__ ((__always_inline__))
24703 vst3_s32 (int32_t * __a, int32x2x3_t val)
24705 __builtin_aarch64_simd_ci __o;
24706 int32x4x3_t temp;
24707 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24708 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24709 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24710 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24711 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24712 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24713 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24716 __extension__ static __inline void __attribute__ ((__always_inline__))
24717 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24719 __builtin_aarch64_simd_ci __o;
24720 uint8x16x3_t temp;
24721 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24722 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24723 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24724 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24725 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24726 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24727 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24730 __extension__ static __inline void __attribute__ ((__always_inline__))
24731 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24733 __builtin_aarch64_simd_ci __o;
24734 uint16x8x3_t temp;
24735 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24736 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24737 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24738 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24739 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24740 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24741 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24744 __extension__ static __inline void __attribute__ ((__always_inline__))
24745 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24747 __builtin_aarch64_simd_ci __o;
24748 uint32x4x3_t temp;
24749 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24750 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24751 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24752 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24753 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24754 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24755 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24758 __extension__ static __inline void __attribute__ ((__always_inline__))
24759 vst3_f32 (float32_t * __a, float32x2x3_t val)
24761 __builtin_aarch64_simd_ci __o;
24762 float32x4x3_t temp;
24763 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24764 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24765 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24766 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24767 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24768 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24769 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24772 __extension__ static __inline void __attribute__ ((__always_inline__))
24773 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24775 __builtin_aarch64_simd_ci __o;
24776 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24777 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24778 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24779 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24782 __extension__ static __inline void __attribute__ ((__always_inline__))
24783 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24785 __builtin_aarch64_simd_ci __o;
24786 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24787 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24788 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24789 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24792 __extension__ static __inline void __attribute__ ((__always_inline__))
24793 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24795 __builtin_aarch64_simd_ci __o;
24796 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24797 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24798 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24799 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24802 __extension__ static __inline void __attribute__ ((__always_inline__))
24803 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24805 __builtin_aarch64_simd_ci __o;
24806 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24807 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24808 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24809 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24812 __extension__ static __inline void __attribute__ ((__always_inline__))
24813 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24815 __builtin_aarch64_simd_ci __o;
24816 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24817 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24818 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24819 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24822 __extension__ static __inline void __attribute__ ((__always_inline__))
24823 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24825 __builtin_aarch64_simd_ci __o;
24826 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24827 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24828 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24829 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24832 __extension__ static __inline void __attribute__ ((__always_inline__))
24833 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24835 __builtin_aarch64_simd_ci __o;
24836 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24837 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24838 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24839 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24842 __extension__ static __inline void __attribute__ ((__always_inline__))
24843 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24845 __builtin_aarch64_simd_ci __o;
24846 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24847 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24848 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24849 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24852 __extension__ static __inline void __attribute__ ((__always_inline__))
24853 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24855 __builtin_aarch64_simd_ci __o;
24856 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24857 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24858 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24859 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24862 __extension__ static __inline void __attribute__ ((__always_inline__))
24863 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24865 __builtin_aarch64_simd_ci __o;
24866 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24867 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24868 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24869 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24872 __extension__ static __inline void __attribute__ ((__always_inline__))
24873 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24875 __builtin_aarch64_simd_ci __o;
24876 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24877 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24878 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24879 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24882 __extension__ static __inline void __attribute__ ((__always_inline__))
24883 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24885 __builtin_aarch64_simd_ci __o;
24886 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24887 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24888 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24889 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24892 __extension__ static __inline void
24893 vst4_s64 (int64_t * __a, int64x1x4_t val)
24895 __builtin_aarch64_simd_xi __o;
24896 int64x2x4_t temp;
24897 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24898 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24899 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24900 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24901 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24902 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24903 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24904 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24905 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24908 __extension__ static __inline void
24909 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24911 __builtin_aarch64_simd_xi __o;
24912 uint64x2x4_t temp;
24913 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24914 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24915 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24916 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24917 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24918 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24919 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24920 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24921 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24924 __extension__ static __inline void
24925 vst4_f64 (float64_t * __a, float64x1x4_t val)
24927 __builtin_aarch64_simd_xi __o;
24928 float64x2x4_t temp;
24929 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24930 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24931 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24932 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24933 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24934 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24935 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24936 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24937 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24940 __extension__ static __inline void
24941 vst4_s8 (int8_t * __a, int8x8x4_t val)
24943 __builtin_aarch64_simd_xi __o;
24944 int8x16x4_t temp;
24945 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24946 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24947 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24948 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24949 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24950 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24951 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24952 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24953 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24956 __extension__ static __inline void __attribute__ ((__always_inline__))
24957 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24959 __builtin_aarch64_simd_xi __o;
24960 poly8x16x4_t temp;
24961 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24962 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24963 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24964 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24965 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24966 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24967 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24968 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24969 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24972 __extension__ static __inline void __attribute__ ((__always_inline__))
24973 vst4_s16 (int16_t * __a, int16x4x4_t val)
24975 __builtin_aarch64_simd_xi __o;
24976 int16x8x4_t temp;
24977 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24978 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24979 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24980 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24981 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24982 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24983 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24984 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24985 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24988 __extension__ static __inline void __attribute__ ((__always_inline__))
24989 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24991 __builtin_aarch64_simd_xi __o;
24992 poly16x8x4_t temp;
24993 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24994 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24995 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24996 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24997 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24998 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24999 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25000 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25001 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25004 __extension__ static __inline void __attribute__ ((__always_inline__))
25005 vst4_s32 (int32_t * __a, int32x2x4_t val)
25007 __builtin_aarch64_simd_xi __o;
25008 int32x4x4_t temp;
25009 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
25010 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
25011 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
25012 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
25013 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25014 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25015 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25016 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25017 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25020 __extension__ static __inline void __attribute__ ((__always_inline__))
25021 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
25023 __builtin_aarch64_simd_xi __o;
25024 uint8x16x4_t temp;
25025 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
25026 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
25027 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
25028 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
25029 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
25030 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
25031 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
25032 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
25033 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
25036 __extension__ static __inline void __attribute__ ((__always_inline__))
25037 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
25039 __builtin_aarch64_simd_xi __o;
25040 uint16x8x4_t temp;
25041 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
25042 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
25043 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
25044 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
25045 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25046 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25047 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25048 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25049 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25052 __extension__ static __inline void __attribute__ ((__always_inline__))
25053 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
25055 __builtin_aarch64_simd_xi __o;
25056 uint32x4x4_t temp;
25057 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
25058 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
25059 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
25060 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
25061 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25062 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25063 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25064 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25065 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25068 __extension__ static __inline void __attribute__ ((__always_inline__))
25069 vst4_f32 (float32_t * __a, float32x2x4_t val)
25071 __builtin_aarch64_simd_xi __o;
25072 float32x4x4_t temp;
25073 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
25074 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
25075 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
25076 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
25077 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
25078 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
25079 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
25080 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
25081 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
25084 __extension__ static __inline void __attribute__ ((__always_inline__))
25085 vst4q_s8 (int8_t * __a, int8x16x4_t val)
25087 __builtin_aarch64_simd_xi __o;
25088 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25089 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25090 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25091 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25092 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25095 __extension__ static __inline void __attribute__ ((__always_inline__))
25096 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
25098 __builtin_aarch64_simd_xi __o;
25099 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25100 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25101 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25102 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25103 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25106 __extension__ static __inline void __attribute__ ((__always_inline__))
25107 vst4q_s16 (int16_t * __a, int16x8x4_t val)
25109 __builtin_aarch64_simd_xi __o;
25110 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25111 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25112 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25113 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25114 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25117 __extension__ static __inline void __attribute__ ((__always_inline__))
25118 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
25120 __builtin_aarch64_simd_xi __o;
25121 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25122 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25123 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25124 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25125 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25128 __extension__ static __inline void __attribute__ ((__always_inline__))
25129 vst4q_s32 (int32_t * __a, int32x4x4_t val)
25131 __builtin_aarch64_simd_xi __o;
25132 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25133 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25134 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25135 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25136 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25139 __extension__ static __inline void __attribute__ ((__always_inline__))
25140 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25142 __builtin_aarch64_simd_xi __o;
25143 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25144 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25145 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25146 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25147 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25150 __extension__ static __inline void __attribute__ ((__always_inline__))
25151 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25153 __builtin_aarch64_simd_xi __o;
25154 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25155 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25156 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25157 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25158 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25161 __extension__ static __inline void __attribute__ ((__always_inline__))
25162 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25164 __builtin_aarch64_simd_xi __o;
25165 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25166 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25167 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25168 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25169 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25172 __extension__ static __inline void __attribute__ ((__always_inline__))
25173 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25175 __builtin_aarch64_simd_xi __o;
25176 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25177 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25178 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25179 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25180 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25183 __extension__ static __inline void __attribute__ ((__always_inline__))
25184 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25186 __builtin_aarch64_simd_xi __o;
25187 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25188 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25189 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25190 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25191 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25194 __extension__ static __inline void __attribute__ ((__always_inline__))
25195 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25197 __builtin_aarch64_simd_xi __o;
25198 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25199 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25200 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25201 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25202 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25205 __extension__ static __inline void __attribute__ ((__always_inline__))
25206 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25208 __builtin_aarch64_simd_xi __o;
25209 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25210 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25211 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25212 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25213 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25216 /* vsub */
25218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25219 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25221 return __a - __b;
25224 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25225 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25227 return __a - __b;
25230 /* vtrn */
25232 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
25233 vtrn_f32 (float32x2_t a, float32x2_t b)
25235 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25238 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
25239 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25241 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25244 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25245 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25247 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25250 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25251 vtrn_s8 (int8x8_t a, int8x8_t b)
25253 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25256 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25257 vtrn_s16 (int16x4_t a, int16x4_t b)
25259 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25262 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25263 vtrn_s32 (int32x2_t a, int32x2_t b)
25265 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25268 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25269 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25271 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25274 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25275 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25277 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25280 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25281 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25283 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25286 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25287 vtrnq_f32 (float32x4_t a, float32x4_t b)
25289 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25292 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25293 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25295 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25298 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25299 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25301 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25304 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25305 vtrnq_s8 (int8x16_t a, int8x16_t b)
25307 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25310 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25311 vtrnq_s16 (int16x8_t a, int16x8_t b)
25313 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25316 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25317 vtrnq_s32 (int32x4_t a, int32x4_t b)
25319 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25322 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25323 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25325 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25328 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25329 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25331 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25334 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25335 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25337 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25340 /* vtst */
25342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25343 vtst_s8 (int8x8_t __a, int8x8_t __b)
25345 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25349 vtst_s16 (int16x4_t __a, int16x4_t __b)
25351 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25355 vtst_s32 (int32x2_t __a, int32x2_t __b)
25357 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25361 vtst_s64 (int64x1_t __a, int64x1_t __b)
25363 return (__a & __b) ? -1ll : 0ll;
25366 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25367 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25369 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25370 (int8x8_t) __b);
25373 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25374 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25376 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25377 (int16x4_t) __b);
25380 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25381 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25383 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25384 (int32x2_t) __b);
25387 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25388 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25390 return (__a & __b) ? -1ll : 0ll;
25393 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25394 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25396 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25399 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25400 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25402 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25405 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25406 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25408 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25411 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25412 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25414 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25417 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25418 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25420 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25421 (int8x16_t) __b);
25424 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25425 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25427 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25428 (int16x8_t) __b);
25431 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25432 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25434 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25435 (int32x4_t) __b);
25438 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25439 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25441 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25442 (int64x2_t) __b);
25445 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25446 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25448 return (__a & __b) ? -1ll : 0ll;
25451 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25452 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25454 return (__a & __b) ? -1ll : 0ll;
25457 /* vuqadd */
25459 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25460 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25462 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25466 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25468 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25471 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25472 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25474 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25477 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25478 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25480 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25484 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25486 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25490 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25492 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25496 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25498 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25502 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25504 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25507 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25508 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25510 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25513 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25514 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25516 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25519 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25520 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25522 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25525 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25526 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25528 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25531 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25532 __extension__ static __inline rettype \
25533 __attribute__ ((__always_inline__)) \
25534 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25536 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25537 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25540 #define __INTERLEAVE_LIST(op) \
25541 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25542 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25543 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25544 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25545 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25546 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25547 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25548 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25549 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25550 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25551 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25552 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25553 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25554 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25555 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25556 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25557 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25558 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25560 /* vuzp */
25562 __INTERLEAVE_LIST (uzp)
25564 /* vzip */
25566 __INTERLEAVE_LIST (zip)
25568 #undef __INTERLEAVE_LIST
25569 #undef __DEFINTERLEAVE
25571 /* End of optimal implementations in approved order. */
25573 #undef __aarch64_vget_lane_any
25574 #undef __aarch64_vget_lane_f32
25575 #undef __aarch64_vget_lane_f64
25576 #undef __aarch64_vget_lane_p8
25577 #undef __aarch64_vget_lane_p16
25578 #undef __aarch64_vget_lane_s8
25579 #undef __aarch64_vget_lane_s16
25580 #undef __aarch64_vget_lane_s32
25581 #undef __aarch64_vget_lane_s64
25582 #undef __aarch64_vget_lane_u8
25583 #undef __aarch64_vget_lane_u16
25584 #undef __aarch64_vget_lane_u32
25585 #undef __aarch64_vget_lane_u64
25587 #undef __aarch64_vgetq_lane_f32
25588 #undef __aarch64_vgetq_lane_f64
25589 #undef __aarch64_vgetq_lane_p8
25590 #undef __aarch64_vgetq_lane_p16
25591 #undef __aarch64_vgetq_lane_s8
25592 #undef __aarch64_vgetq_lane_s16
25593 #undef __aarch64_vgetq_lane_s32
25594 #undef __aarch64_vgetq_lane_s64
25595 #undef __aarch64_vgetq_lane_u8
25596 #undef __aarch64_vgetq_lane_u16
25597 #undef __aarch64_vgetq_lane_u32
25598 #undef __aarch64_vgetq_lane_u64
25600 #undef __aarch64_vdup_lane_any
25601 #undef __aarch64_vdup_lane_f32
25602 #undef __aarch64_vdup_lane_f64
25603 #undef __aarch64_vdup_lane_p8
25604 #undef __aarch64_vdup_lane_p16
25605 #undef __aarch64_vdup_lane_s8
25606 #undef __aarch64_vdup_lane_s16
25607 #undef __aarch64_vdup_lane_s32
25608 #undef __aarch64_vdup_lane_s64
25609 #undef __aarch64_vdup_lane_u8
25610 #undef __aarch64_vdup_lane_u16
25611 #undef __aarch64_vdup_lane_u32
25612 #undef __aarch64_vdup_lane_u64
25613 #undef __aarch64_vdup_laneq_f32
25614 #undef __aarch64_vdup_laneq_f64
25615 #undef __aarch64_vdup_laneq_p8
25616 #undef __aarch64_vdup_laneq_p16
25617 #undef __aarch64_vdup_laneq_s8
25618 #undef __aarch64_vdup_laneq_s16
25619 #undef __aarch64_vdup_laneq_s32
25620 #undef __aarch64_vdup_laneq_s64
25621 #undef __aarch64_vdup_laneq_u8
25622 #undef __aarch64_vdup_laneq_u16
25623 #undef __aarch64_vdup_laneq_u32
25624 #undef __aarch64_vdup_laneq_u64
25625 #undef __aarch64_vdupq_lane_f32
25626 #undef __aarch64_vdupq_lane_f64
25627 #undef __aarch64_vdupq_lane_p8
25628 #undef __aarch64_vdupq_lane_p16
25629 #undef __aarch64_vdupq_lane_s8
25630 #undef __aarch64_vdupq_lane_s16
25631 #undef __aarch64_vdupq_lane_s32
25632 #undef __aarch64_vdupq_lane_s64
25633 #undef __aarch64_vdupq_lane_u8
25634 #undef __aarch64_vdupq_lane_u16
25635 #undef __aarch64_vdupq_lane_u32
25636 #undef __aarch64_vdupq_lane_u64
25637 #undef __aarch64_vdupq_laneq_f32
25638 #undef __aarch64_vdupq_laneq_f64
25639 #undef __aarch64_vdupq_laneq_p8
25640 #undef __aarch64_vdupq_laneq_p16
25641 #undef __aarch64_vdupq_laneq_s8
25642 #undef __aarch64_vdupq_laneq_s16
25643 #undef __aarch64_vdupq_laneq_s32
25644 #undef __aarch64_vdupq_laneq_s64
25645 #undef __aarch64_vdupq_laneq_u8
25646 #undef __aarch64_vdupq_laneq_u16
25647 #undef __aarch64_vdupq_laneq_u32
25648 #undef __aarch64_vdupq_laneq_u64
25650 #endif