2013-10-22 Jan-Benedict Glaw <jbglaw@lug-owl.de>
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob15d1ed96584c2cac586e5960d05de4491a5ca968
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef int64_t int64x1_t;
42 typedef int32_t int32x1_t;
43 typedef int16_t int16x1_t;
44 typedef int8_t int8x1_t;
45 typedef double float64x1_t;
46 typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef uint64_t uint64x1_t;
59 typedef uint32_t uint32x1_t;
60 typedef uint16_t uint16x1_t;
61 typedef uint8_t uint8x1_t;
62 typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68 typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70 typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72 typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74 typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76 typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78 typedef __builtin_aarch64_simd_uqi uint8x16_t
79 __attribute__ ((__vector_size__ (16)));
80 typedef __builtin_aarch64_simd_uhi uint16x8_t
81 __attribute__ ((__vector_size__ (16)));
82 typedef __builtin_aarch64_simd_usi uint32x4_t
83 __attribute__ ((__vector_size__ (16)));
84 typedef __builtin_aarch64_simd_udi uint64x2_t
85 __attribute__ ((__vector_size__ (16)));
87 typedef float float32_t;
88 typedef double float64_t;
89 typedef __builtin_aarch64_simd_poly8 poly8_t;
90 typedef __builtin_aarch64_simd_poly16 poly16_t;
92 typedef struct int8x8x2_t
94 int8x8_t val[2];
95 } int8x8x2_t;
97 typedef struct int8x16x2_t
99 int8x16_t val[2];
100 } int8x16x2_t;
102 typedef struct int16x4x2_t
104 int16x4_t val[2];
105 } int16x4x2_t;
107 typedef struct int16x8x2_t
109 int16x8_t val[2];
110 } int16x8x2_t;
112 typedef struct int32x2x2_t
114 int32x2_t val[2];
115 } int32x2x2_t;
117 typedef struct int32x4x2_t
119 int32x4_t val[2];
120 } int32x4x2_t;
122 typedef struct int64x1x2_t
124 int64x1_t val[2];
125 } int64x1x2_t;
127 typedef struct int64x2x2_t
129 int64x2_t val[2];
130 } int64x2x2_t;
132 typedef struct uint8x8x2_t
134 uint8x8_t val[2];
135 } uint8x8x2_t;
137 typedef struct uint8x16x2_t
139 uint8x16_t val[2];
140 } uint8x16x2_t;
142 typedef struct uint16x4x2_t
144 uint16x4_t val[2];
145 } uint16x4x2_t;
147 typedef struct uint16x8x2_t
149 uint16x8_t val[2];
150 } uint16x8x2_t;
152 typedef struct uint32x2x2_t
154 uint32x2_t val[2];
155 } uint32x2x2_t;
157 typedef struct uint32x4x2_t
159 uint32x4_t val[2];
160 } uint32x4x2_t;
162 typedef struct uint64x1x2_t
164 uint64x1_t val[2];
165 } uint64x1x2_t;
167 typedef struct uint64x2x2_t
169 uint64x2_t val[2];
170 } uint64x2x2_t;
172 typedef struct float32x2x2_t
174 float32x2_t val[2];
175 } float32x2x2_t;
177 typedef struct float32x4x2_t
179 float32x4_t val[2];
180 } float32x4x2_t;
182 typedef struct float64x2x2_t
184 float64x2_t val[2];
185 } float64x2x2_t;
187 typedef struct float64x1x2_t
189 float64x1_t val[2];
190 } float64x1x2_t;
192 typedef struct poly8x8x2_t
194 poly8x8_t val[2];
195 } poly8x8x2_t;
197 typedef struct poly8x16x2_t
199 poly8x16_t val[2];
200 } poly8x16x2_t;
202 typedef struct poly16x4x2_t
204 poly16x4_t val[2];
205 } poly16x4x2_t;
207 typedef struct poly16x8x2_t
209 poly16x8_t val[2];
210 } poly16x8x2_t;
212 typedef struct int8x8x3_t
214 int8x8_t val[3];
215 } int8x8x3_t;
217 typedef struct int8x16x3_t
219 int8x16_t val[3];
220 } int8x16x3_t;
222 typedef struct int16x4x3_t
224 int16x4_t val[3];
225 } int16x4x3_t;
227 typedef struct int16x8x3_t
229 int16x8_t val[3];
230 } int16x8x3_t;
232 typedef struct int32x2x3_t
234 int32x2_t val[3];
235 } int32x2x3_t;
237 typedef struct int32x4x3_t
239 int32x4_t val[3];
240 } int32x4x3_t;
242 typedef struct int64x1x3_t
244 int64x1_t val[3];
245 } int64x1x3_t;
247 typedef struct int64x2x3_t
249 int64x2_t val[3];
250 } int64x2x3_t;
252 typedef struct uint8x8x3_t
254 uint8x8_t val[3];
255 } uint8x8x3_t;
257 typedef struct uint8x16x3_t
259 uint8x16_t val[3];
260 } uint8x16x3_t;
262 typedef struct uint16x4x3_t
264 uint16x4_t val[3];
265 } uint16x4x3_t;
267 typedef struct uint16x8x3_t
269 uint16x8_t val[3];
270 } uint16x8x3_t;
272 typedef struct uint32x2x3_t
274 uint32x2_t val[3];
275 } uint32x2x3_t;
277 typedef struct uint32x4x3_t
279 uint32x4_t val[3];
280 } uint32x4x3_t;
282 typedef struct uint64x1x3_t
284 uint64x1_t val[3];
285 } uint64x1x3_t;
287 typedef struct uint64x2x3_t
289 uint64x2_t val[3];
290 } uint64x2x3_t;
292 typedef struct float32x2x3_t
294 float32x2_t val[3];
295 } float32x2x3_t;
297 typedef struct float32x4x3_t
299 float32x4_t val[3];
300 } float32x4x3_t;
302 typedef struct float64x2x3_t
304 float64x2_t val[3];
305 } float64x2x3_t;
307 typedef struct float64x1x3_t
309 float64x1_t val[3];
310 } float64x1x3_t;
312 typedef struct poly8x8x3_t
314 poly8x8_t val[3];
315 } poly8x8x3_t;
317 typedef struct poly8x16x3_t
319 poly8x16_t val[3];
320 } poly8x16x3_t;
322 typedef struct poly16x4x3_t
324 poly16x4_t val[3];
325 } poly16x4x3_t;
327 typedef struct poly16x8x3_t
329 poly16x8_t val[3];
330 } poly16x8x3_t;
332 typedef struct int8x8x4_t
334 int8x8_t val[4];
335 } int8x8x4_t;
337 typedef struct int8x16x4_t
339 int8x16_t val[4];
340 } int8x16x4_t;
342 typedef struct int16x4x4_t
344 int16x4_t val[4];
345 } int16x4x4_t;
347 typedef struct int16x8x4_t
349 int16x8_t val[4];
350 } int16x8x4_t;
352 typedef struct int32x2x4_t
354 int32x2_t val[4];
355 } int32x2x4_t;
357 typedef struct int32x4x4_t
359 int32x4_t val[4];
360 } int32x4x4_t;
362 typedef struct int64x1x4_t
364 int64x1_t val[4];
365 } int64x1x4_t;
367 typedef struct int64x2x4_t
369 int64x2_t val[4];
370 } int64x2x4_t;
372 typedef struct uint8x8x4_t
374 uint8x8_t val[4];
375 } uint8x8x4_t;
377 typedef struct uint8x16x4_t
379 uint8x16_t val[4];
380 } uint8x16x4_t;
382 typedef struct uint16x4x4_t
384 uint16x4_t val[4];
385 } uint16x4x4_t;
387 typedef struct uint16x8x4_t
389 uint16x8_t val[4];
390 } uint16x8x4_t;
392 typedef struct uint32x2x4_t
394 uint32x2_t val[4];
395 } uint32x2x4_t;
397 typedef struct uint32x4x4_t
399 uint32x4_t val[4];
400 } uint32x4x4_t;
402 typedef struct uint64x1x4_t
404 uint64x1_t val[4];
405 } uint64x1x4_t;
407 typedef struct uint64x2x4_t
409 uint64x2_t val[4];
410 } uint64x2x4_t;
412 typedef struct float32x2x4_t
414 float32x2_t val[4];
415 } float32x2x4_t;
417 typedef struct float32x4x4_t
419 float32x4_t val[4];
420 } float32x4x4_t;
422 typedef struct float64x2x4_t
424 float64x2_t val[4];
425 } float64x2x4_t;
427 typedef struct float64x1x4_t
429 float64x1_t val[4];
430 } float64x1x4_t;
432 typedef struct poly8x8x4_t
434 poly8x8_t val[4];
435 } poly8x8x4_t;
437 typedef struct poly8x16x4_t
439 poly8x16_t val[4];
440 } poly8x16x4_t;
442 typedef struct poly16x4x4_t
444 poly16x4_t val[4];
445 } poly16x4x4_t;
447 typedef struct poly16x8x4_t
449 poly16x8_t val[4];
450 } poly16x8x4_t;
452 /* vget_lane internal macros. */
454 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
455 (__cast_ret \
456 __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
458 #define __aarch64_vget_lane_f32(__a, __b) \
459 __aarch64_vget_lane_any (v2sf, , , __a, __b)
460 #define __aarch64_vget_lane_f64(__a, __b) (__a)
462 #define __aarch64_vget_lane_p8(__a, __b) \
463 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
464 #define __aarch64_vget_lane_p16(__a, __b) \
465 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
467 #define __aarch64_vget_lane_s8(__a, __b) \
468 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
469 #define __aarch64_vget_lane_s16(__a, __b) \
470 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
471 #define __aarch64_vget_lane_s32(__a, __b) \
472 __aarch64_vget_lane_any (v2si, , ,__a, __b)
473 #define __aarch64_vget_lane_s64(__a, __b) (__a)
475 #define __aarch64_vget_lane_u8(__a, __b) \
476 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
477 #define __aarch64_vget_lane_u16(__a, __b) \
478 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
479 #define __aarch64_vget_lane_u32(__a, __b) \
480 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
481 #define __aarch64_vget_lane_u64(__a, __b) (__a)
483 #define __aarch64_vgetq_lane_f32(__a, __b) \
484 __aarch64_vget_lane_any (v4sf, , , __a, __b)
485 #define __aarch64_vgetq_lane_f64(__a, __b) \
486 __aarch64_vget_lane_any (v2df, , , __a, __b)
488 #define __aarch64_vgetq_lane_p8(__a, __b) \
489 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
490 #define __aarch64_vgetq_lane_p16(__a, __b) \
491 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
493 #define __aarch64_vgetq_lane_s8(__a, __b) \
494 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
495 #define __aarch64_vgetq_lane_s16(__a, __b) \
496 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
497 #define __aarch64_vgetq_lane_s32(__a, __b) \
498 __aarch64_vget_lane_any (v4si, , ,__a, __b)
499 #define __aarch64_vgetq_lane_s64(__a, __b) \
500 __aarch64_vget_lane_any (v2di, , ,__a, __b)
502 #define __aarch64_vgetq_lane_u8(__a, __b) \
503 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
504 #define __aarch64_vgetq_lane_u16(__a, __b) \
505 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
506 #define __aarch64_vgetq_lane_u32(__a, __b) \
507 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
508 #define __aarch64_vgetq_lane_u64(__a, __b) \
509 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
511 /* __aarch64_vdup_lane internal macros. */
512 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
513 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
515 #define __aarch64_vdup_lane_f32(__a, __b) \
516 __aarch64_vdup_lane_any (f32, , , __a, __b)
517 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
518 #define __aarch64_vdup_lane_p8(__a, __b) \
519 __aarch64_vdup_lane_any (p8, , , __a, __b)
520 #define __aarch64_vdup_lane_p16(__a, __b) \
521 __aarch64_vdup_lane_any (p16, , , __a, __b)
522 #define __aarch64_vdup_lane_s8(__a, __b) \
523 __aarch64_vdup_lane_any (s8, , , __a, __b)
524 #define __aarch64_vdup_lane_s16(__a, __b) \
525 __aarch64_vdup_lane_any (s16, , , __a, __b)
526 #define __aarch64_vdup_lane_s32(__a, __b) \
527 __aarch64_vdup_lane_any (s32, , , __a, __b)
528 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
529 #define __aarch64_vdup_lane_u8(__a, __b) \
530 __aarch64_vdup_lane_any (u8, , , __a, __b)
531 #define __aarch64_vdup_lane_u16(__a, __b) \
532 __aarch64_vdup_lane_any (u16, , , __a, __b)
533 #define __aarch64_vdup_lane_u32(__a, __b) \
534 __aarch64_vdup_lane_any (u32, , , __a, __b)
535 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
537 /* __aarch64_vdup_laneq internal macros. */
538 #define __aarch64_vdup_laneq_f32(__a, __b) \
539 __aarch64_vdup_lane_any (f32, , q, __a, __b)
540 #define __aarch64_vdup_laneq_f64(__a, __b) \
541 __aarch64_vdup_lane_any (f64, , q, __a, __b)
542 #define __aarch64_vdup_laneq_p8(__a, __b) \
543 __aarch64_vdup_lane_any (p8, , q, __a, __b)
544 #define __aarch64_vdup_laneq_p16(__a, __b) \
545 __aarch64_vdup_lane_any (p16, , q, __a, __b)
546 #define __aarch64_vdup_laneq_s8(__a, __b) \
547 __aarch64_vdup_lane_any (s8, , q, __a, __b)
548 #define __aarch64_vdup_laneq_s16(__a, __b) \
549 __aarch64_vdup_lane_any (s16, , q, __a, __b)
550 #define __aarch64_vdup_laneq_s32(__a, __b) \
551 __aarch64_vdup_lane_any (s32, , q, __a, __b)
552 #define __aarch64_vdup_laneq_s64(__a, __b) \
553 __aarch64_vdup_lane_any (s64, , q, __a, __b)
554 #define __aarch64_vdup_laneq_u8(__a, __b) \
555 __aarch64_vdup_lane_any (u8, , q, __a, __b)
556 #define __aarch64_vdup_laneq_u16(__a, __b) \
557 __aarch64_vdup_lane_any (u16, , q, __a, __b)
558 #define __aarch64_vdup_laneq_u32(__a, __b) \
559 __aarch64_vdup_lane_any (u32, , q, __a, __b)
560 #define __aarch64_vdup_laneq_u64(__a, __b) \
561 __aarch64_vdup_lane_any (u64, , q, __a, __b)
563 /* __aarch64_vdupq_lane internal macros. */
564 #define __aarch64_vdupq_lane_f32(__a, __b) \
565 __aarch64_vdup_lane_any (f32, q, , __a, __b)
566 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
567 #define __aarch64_vdupq_lane_p8(__a, __b) \
568 __aarch64_vdup_lane_any (p8, q, , __a, __b)
569 #define __aarch64_vdupq_lane_p16(__a, __b) \
570 __aarch64_vdup_lane_any (p16, q, , __a, __b)
571 #define __aarch64_vdupq_lane_s8(__a, __b) \
572 __aarch64_vdup_lane_any (s8, q, , __a, __b)
573 #define __aarch64_vdupq_lane_s16(__a, __b) \
574 __aarch64_vdup_lane_any (s16, q, , __a, __b)
575 #define __aarch64_vdupq_lane_s32(__a, __b) \
576 __aarch64_vdup_lane_any (s32, q, , __a, __b)
577 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
578 #define __aarch64_vdupq_lane_u8(__a, __b) \
579 __aarch64_vdup_lane_any (u8, q, , __a, __b)
580 #define __aarch64_vdupq_lane_u16(__a, __b) \
581 __aarch64_vdup_lane_any (u16, q, , __a, __b)
582 #define __aarch64_vdupq_lane_u32(__a, __b) \
583 __aarch64_vdup_lane_any (u32, q, , __a, __b)
584 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
586 /* __aarch64_vdupq_laneq internal macros. */
587 #define __aarch64_vdupq_laneq_f32(__a, __b) \
588 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
589 #define __aarch64_vdupq_laneq_f64(__a, __b) \
590 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
591 #define __aarch64_vdupq_laneq_p8(__a, __b) \
592 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
593 #define __aarch64_vdupq_laneq_p16(__a, __b) \
594 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
595 #define __aarch64_vdupq_laneq_s8(__a, __b) \
596 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
597 #define __aarch64_vdupq_laneq_s16(__a, __b) \
598 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
599 #define __aarch64_vdupq_laneq_s32(__a, __b) \
600 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
601 #define __aarch64_vdupq_laneq_s64(__a, __b) \
602 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
603 #define __aarch64_vdupq_laneq_u8(__a, __b) \
604 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
605 #define __aarch64_vdupq_laneq_u16(__a, __b) \
606 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
607 #define __aarch64_vdupq_laneq_u32(__a, __b) \
608 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
609 #define __aarch64_vdupq_laneq_u64(__a, __b) \
610 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
612 /* vadd */
613 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
614 vadd_s8 (int8x8_t __a, int8x8_t __b)
616 return __a + __b;
619 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
620 vadd_s16 (int16x4_t __a, int16x4_t __b)
622 return __a + __b;
625 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
626 vadd_s32 (int32x2_t __a, int32x2_t __b)
628 return __a + __b;
631 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
632 vadd_f32 (float32x2_t __a, float32x2_t __b)
634 return __a + __b;
637 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
638 vadd_f64 (float64x1_t __a, float64x1_t __b)
640 return __a + __b;
643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
644 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
646 return __a + __b;
649 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
650 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
652 return __a + __b;
655 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
656 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
658 return __a + __b;
661 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
662 vadd_s64 (int64x1_t __a, int64x1_t __b)
664 return __a + __b;
667 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
668 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
670 return __a + __b;
673 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
674 vaddq_s8 (int8x16_t __a, int8x16_t __b)
676 return __a + __b;
679 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
680 vaddq_s16 (int16x8_t __a, int16x8_t __b)
682 return __a + __b;
685 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
686 vaddq_s32 (int32x4_t __a, int32x4_t __b)
688 return __a + __b;
691 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
692 vaddq_s64 (int64x2_t __a, int64x2_t __b)
694 return __a + __b;
697 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
698 vaddq_f32 (float32x4_t __a, float32x4_t __b)
700 return __a + __b;
703 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
704 vaddq_f64 (float64x2_t __a, float64x2_t __b)
706 return __a + __b;
709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
710 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
712 return __a + __b;
715 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
716 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
718 return __a + __b;
721 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
722 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
724 return __a + __b;
727 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
728 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
730 return __a + __b;
733 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
734 vaddl_s8 (int8x8_t __a, int8x8_t __b)
736 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
739 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
740 vaddl_s16 (int16x4_t __a, int16x4_t __b)
742 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
745 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
746 vaddl_s32 (int32x2_t __a, int32x2_t __b)
748 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
751 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
752 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
754 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
755 (int8x8_t) __b);
758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
759 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
761 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
762 (int16x4_t) __b);
765 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
766 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
768 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
769 (int32x2_t) __b);
772 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
773 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
775 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
778 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
779 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
781 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
784 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
785 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
787 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
791 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
793 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
794 (int8x16_t) __b);
797 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
798 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
800 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
801 (int16x8_t) __b);
804 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
805 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
807 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
808 (int32x4_t) __b);
811 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
812 vaddw_s8 (int16x8_t __a, int8x8_t __b)
814 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
817 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
818 vaddw_s16 (int32x4_t __a, int16x4_t __b)
820 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
824 vaddw_s32 (int64x2_t __a, int32x2_t __b)
826 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
829 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
830 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
832 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
833 (int8x8_t) __b);
836 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
837 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
839 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
840 (int16x4_t) __b);
843 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
844 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
846 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
847 (int32x2_t) __b);
850 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
851 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
853 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
856 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
857 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
859 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
862 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
863 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
865 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
869 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
871 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
872 (int8x16_t) __b);
875 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
876 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
878 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
879 (int16x8_t) __b);
882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
883 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
885 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
886 (int32x4_t) __b);
889 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
890 vhadd_s8 (int8x8_t __a, int8x8_t __b)
892 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
895 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
896 vhadd_s16 (int16x4_t __a, int16x4_t __b)
898 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
901 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
902 vhadd_s32 (int32x2_t __a, int32x2_t __b)
904 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
907 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
908 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
910 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
911 (int8x8_t) __b);
914 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
915 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
917 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
918 (int16x4_t) __b);
921 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
922 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
924 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
925 (int32x2_t) __b);
928 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
929 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
931 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
935 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
937 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
940 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
941 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
943 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
946 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
947 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
949 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
950 (int8x16_t) __b);
953 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
954 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
956 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
957 (int16x8_t) __b);
960 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
961 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
963 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
964 (int32x4_t) __b);
967 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
968 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
970 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
973 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
974 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
976 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
979 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
980 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
982 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
985 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
986 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
988 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
989 (int8x8_t) __b);
992 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
993 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
995 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
996 (int16x4_t) __b);
999 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1000 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1002 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1003 (int32x2_t) __b);
1006 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1007 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1009 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1012 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1013 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1015 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1019 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1021 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1024 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1025 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1027 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1028 (int8x16_t) __b);
1031 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1032 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1034 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1035 (int16x8_t) __b);
1038 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1039 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1041 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1042 (int32x4_t) __b);
1045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1046 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1048 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1051 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1052 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1054 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1057 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1058 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1060 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1063 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1064 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1066 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1067 (int16x8_t) __b);
1070 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1071 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1073 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1074 (int32x4_t) __b);
1077 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1078 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1080 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1081 (int64x2_t) __b);
1084 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1085 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1087 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1090 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1091 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1093 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1096 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1097 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1099 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1102 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1103 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1105 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1106 (int16x8_t) __b);
1109 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1110 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1112 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1113 (int32x4_t) __b);
1116 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1117 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1119 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1120 (int64x2_t) __b);
1123 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1124 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1126 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1129 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1130 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1132 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1135 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1136 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1138 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1141 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1142 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1144 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1145 (int16x8_t) __b,
1146 (int16x8_t) __c);
1149 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1150 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1152 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1153 (int32x4_t) __b,
1154 (int32x4_t) __c);
1157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1158 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1160 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1161 (int64x2_t) __b,
1162 (int64x2_t) __c);
1165 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1166 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1168 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1171 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1172 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1174 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1178 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1180 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1184 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1186 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1187 (int16x8_t) __b,
1188 (int16x8_t) __c);
1191 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1192 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1194 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1195 (int32x4_t) __b,
1196 (int32x4_t) __c);
1199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1200 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1202 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1203 (int64x2_t) __b,
1204 (int64x2_t) __c);
1207 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1208 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1210 return __a / __b;
1213 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1214 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1216 return __a / __b;
1219 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1220 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1222 return __a / __b;
1225 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1226 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1228 return __a / __b;
1231 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1232 vmul_s8 (int8x8_t __a, int8x8_t __b)
1234 return __a * __b;
1237 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1238 vmul_s16 (int16x4_t __a, int16x4_t __b)
1240 return __a * __b;
1243 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1244 vmul_s32 (int32x2_t __a, int32x2_t __b)
1246 return __a * __b;
1249 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1250 vmul_f32 (float32x2_t __a, float32x2_t __b)
1252 return __a * __b;
1255 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1256 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1258 return __a * __b;
1261 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1262 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1264 return __a * __b;
1267 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1268 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1270 return __a * __b;
1273 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1274 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1276 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1277 (int8x8_t) __b);
1280 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1281 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1283 return __a * __b;
1286 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1287 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1289 return __a * __b;
1292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1293 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1295 return __a * __b;
1298 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1299 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1301 return __a * __b;
1304 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1305 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1307 return __a * __b;
1310 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1311 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1313 return __a * __b;
1316 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1317 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1319 return __a * __b;
1322 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1323 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1325 return __a * __b;
1328 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1329 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1331 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1332 (int8x16_t) __b);
1335 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1336 vand_s8 (int8x8_t __a, int8x8_t __b)
1338 return __a & __b;
1341 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1342 vand_s16 (int16x4_t __a, int16x4_t __b)
1344 return __a & __b;
1347 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1348 vand_s32 (int32x2_t __a, int32x2_t __b)
1350 return __a & __b;
1353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1354 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1356 return __a & __b;
1359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1360 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1362 return __a & __b;
1365 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1366 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1368 return __a & __b;
1371 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1372 vand_s64 (int64x1_t __a, int64x1_t __b)
1374 return __a & __b;
1377 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1378 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1380 return __a & __b;
1383 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1384 vandq_s8 (int8x16_t __a, int8x16_t __b)
1386 return __a & __b;
1389 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1390 vandq_s16 (int16x8_t __a, int16x8_t __b)
1392 return __a & __b;
1395 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1396 vandq_s32 (int32x4_t __a, int32x4_t __b)
1398 return __a & __b;
1401 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1402 vandq_s64 (int64x2_t __a, int64x2_t __b)
1404 return __a & __b;
1407 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1408 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1410 return __a & __b;
1413 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1414 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1416 return __a & __b;
1419 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1420 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1422 return __a & __b;
1425 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1426 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1428 return __a & __b;
1431 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1432 vorr_s8 (int8x8_t __a, int8x8_t __b)
1434 return __a | __b;
1437 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1438 vorr_s16 (int16x4_t __a, int16x4_t __b)
1440 return __a | __b;
1443 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1444 vorr_s32 (int32x2_t __a, int32x2_t __b)
1446 return __a | __b;
1449 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1450 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1452 return __a | __b;
1455 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1456 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1458 return __a | __b;
1461 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1462 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1464 return __a | __b;
1467 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1468 vorr_s64 (int64x1_t __a, int64x1_t __b)
1470 return __a | __b;
1473 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1474 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1476 return __a | __b;
1479 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1480 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1482 return __a | __b;
1485 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1486 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1488 return __a | __b;
1491 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1492 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1494 return __a | __b;
1497 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1498 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1500 return __a | __b;
1503 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1504 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1506 return __a | __b;
1509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1510 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1512 return __a | __b;
1515 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1516 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1518 return __a | __b;
1521 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1522 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1524 return __a | __b;
1527 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1528 veor_s8 (int8x8_t __a, int8x8_t __b)
1530 return __a ^ __b;
1533 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1534 veor_s16 (int16x4_t __a, int16x4_t __b)
1536 return __a ^ __b;
1539 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1540 veor_s32 (int32x2_t __a, int32x2_t __b)
1542 return __a ^ __b;
1545 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1546 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1548 return __a ^ __b;
1551 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1552 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1554 return __a ^ __b;
1557 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1558 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1560 return __a ^ __b;
1563 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1564 veor_s64 (int64x1_t __a, int64x1_t __b)
1566 return __a ^ __b;
1569 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1570 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1572 return __a ^ __b;
1575 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1576 veorq_s8 (int8x16_t __a, int8x16_t __b)
1578 return __a ^ __b;
1581 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1582 veorq_s16 (int16x8_t __a, int16x8_t __b)
1584 return __a ^ __b;
1587 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1588 veorq_s32 (int32x4_t __a, int32x4_t __b)
1590 return __a ^ __b;
1593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1594 veorq_s64 (int64x2_t __a, int64x2_t __b)
1596 return __a ^ __b;
1599 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1600 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1602 return __a ^ __b;
1605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1606 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1608 return __a ^ __b;
1611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1612 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1614 return __a ^ __b;
1617 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1618 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1620 return __a ^ __b;
1623 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1624 vbic_s8 (int8x8_t __a, int8x8_t __b)
1626 return __a & ~__b;
1629 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1630 vbic_s16 (int16x4_t __a, int16x4_t __b)
1632 return __a & ~__b;
1635 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1636 vbic_s32 (int32x2_t __a, int32x2_t __b)
1638 return __a & ~__b;
1641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1642 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1644 return __a & ~__b;
1647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1648 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1650 return __a & ~__b;
1653 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1654 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1656 return __a & ~__b;
1659 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1660 vbic_s64 (int64x1_t __a, int64x1_t __b)
1662 return __a & ~__b;
1665 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1666 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1668 return __a & ~__b;
1671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1672 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1674 return __a & ~__b;
1677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1678 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1680 return __a & ~__b;
1683 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1684 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1686 return __a & ~__b;
1689 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1690 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1692 return __a & ~__b;
1695 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1696 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1698 return __a & ~__b;
1701 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1702 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1704 return __a & ~__b;
1707 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1708 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1710 return __a & ~__b;
1713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1714 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1716 return __a & ~__b;
1719 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1720 vorn_s8 (int8x8_t __a, int8x8_t __b)
1722 return __a | ~__b;
1725 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1726 vorn_s16 (int16x4_t __a, int16x4_t __b)
1728 return __a | ~__b;
1731 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1732 vorn_s32 (int32x2_t __a, int32x2_t __b)
1734 return __a | ~__b;
1737 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1738 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1740 return __a | ~__b;
1743 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1744 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1746 return __a | ~__b;
1749 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1750 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1752 return __a | ~__b;
1755 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1756 vorn_s64 (int64x1_t __a, int64x1_t __b)
1758 return __a | ~__b;
1761 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1762 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1764 return __a | ~__b;
1767 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1768 vornq_s8 (int8x16_t __a, int8x16_t __b)
1770 return __a | ~__b;
1773 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1774 vornq_s16 (int16x8_t __a, int16x8_t __b)
1776 return __a | ~__b;
1779 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1780 vornq_s32 (int32x4_t __a, int32x4_t __b)
1782 return __a | ~__b;
1785 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1786 vornq_s64 (int64x2_t __a, int64x2_t __b)
1788 return __a | ~__b;
1791 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1792 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1794 return __a | ~__b;
1797 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1798 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1800 return __a | ~__b;
1803 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1804 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1806 return __a | ~__b;
1809 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1810 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1812 return __a | ~__b;
1815 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1816 vsub_s8 (int8x8_t __a, int8x8_t __b)
1818 return __a - __b;
1821 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1822 vsub_s16 (int16x4_t __a, int16x4_t __b)
1824 return __a - __b;
1827 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1828 vsub_s32 (int32x2_t __a, int32x2_t __b)
1830 return __a - __b;
1833 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1834 vsub_f32 (float32x2_t __a, float32x2_t __b)
1836 return __a - __b;
1839 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1840 vsub_f64 (float64x1_t __a, float64x1_t __b)
1842 return __a - __b;
1845 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1846 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1848 return __a - __b;
1851 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1852 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1854 return __a - __b;
1857 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1858 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1860 return __a - __b;
1863 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1864 vsub_s64 (int64x1_t __a, int64x1_t __b)
1866 return __a - __b;
1869 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1870 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1872 return __a - __b;
1875 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1876 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1878 return __a - __b;
1881 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1882 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1884 return __a - __b;
1887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1888 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1890 return __a - __b;
1893 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1894 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1896 return __a - __b;
1899 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1900 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1902 return __a - __b;
1905 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1906 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1908 return __a - __b;
1911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1912 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1914 return __a - __b;
1917 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1918 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1920 return __a - __b;
1923 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1924 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1926 return __a - __b;
1929 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1930 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1932 return __a - __b;
1935 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1936 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1938 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1941 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1942 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1944 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1947 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1948 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1950 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1953 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1954 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1956 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1957 (int8x8_t) __b);
1960 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1961 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1963 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1964 (int16x4_t) __b);
1967 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1968 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1970 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1971 (int32x2_t) __b);
1974 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1975 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1977 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1980 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1981 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1983 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1986 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1987 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1989 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1992 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1993 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1995 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1996 (int8x16_t) __b);
1999 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2000 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2002 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2003 (int16x8_t) __b);
2006 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2007 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2009 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2010 (int32x4_t) __b);
2013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2014 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2016 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2020 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2022 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2025 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2026 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2028 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2031 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2032 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2034 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2035 (int8x8_t) __b);
2038 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2039 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2041 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2042 (int16x4_t) __b);
2045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2046 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2048 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2049 (int32x2_t) __b);
2052 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2053 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2055 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2058 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2059 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2061 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2064 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2065 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2067 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2071 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2073 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2074 (int8x16_t) __b);
2077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2078 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2080 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2081 (int16x8_t) __b);
2084 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2085 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2087 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2088 (int32x4_t) __b);
2091 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2092 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2094 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2097 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2098 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2100 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2103 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2104 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2106 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2109 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2110 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2112 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2115 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2116 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2118 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2119 (int8x8_t) __b);
2122 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2123 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2125 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2126 (int16x4_t) __b);
2129 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2130 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2132 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2133 (int32x2_t) __b);
2136 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2137 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2139 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2140 (int64x1_t) __b);
2143 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2144 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2146 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2149 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2150 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2152 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2155 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2156 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2158 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2161 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2162 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2164 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2167 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2168 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2170 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2171 (int8x16_t) __b);
2174 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2175 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2177 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2178 (int16x8_t) __b);
2181 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2182 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2184 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2185 (int32x4_t) __b);
2188 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2189 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2191 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2192 (int64x2_t) __b);
2195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2196 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2198 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2201 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2202 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2204 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2207 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2208 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2210 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2213 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2214 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2216 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2219 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2220 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2222 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2223 (int8x8_t) __b);
2226 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2227 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2229 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2230 (int16x4_t) __b);
2233 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2234 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2236 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2237 (int32x2_t) __b);
2240 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2241 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2243 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2244 (int64x1_t) __b);
2247 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2248 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2250 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2253 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2254 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2256 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2260 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2262 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2265 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2266 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2268 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2271 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2272 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2274 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2275 (int8x16_t) __b);
2278 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2279 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2281 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2282 (int16x8_t) __b);
2285 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2286 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2288 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2289 (int32x4_t) __b);
2292 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2293 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2295 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2296 (int64x2_t) __b);
2299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2300 vqneg_s8 (int8x8_t __a)
2302 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2305 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2306 vqneg_s16 (int16x4_t __a)
2308 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2311 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2312 vqneg_s32 (int32x2_t __a)
2314 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2317 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2318 vqnegq_s8 (int8x16_t __a)
2320 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2323 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2324 vqnegq_s16 (int16x8_t __a)
2326 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2329 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2330 vqnegq_s32 (int32x4_t __a)
2332 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2335 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2336 vqabs_s8 (int8x8_t __a)
2338 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2341 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2342 vqabs_s16 (int16x4_t __a)
2344 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2347 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2348 vqabs_s32 (int32x2_t __a)
2350 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2353 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2354 vqabsq_s8 (int8x16_t __a)
2356 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2359 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2360 vqabsq_s16 (int16x8_t __a)
2362 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2365 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2366 vqabsq_s32 (int32x4_t __a)
2368 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2372 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2374 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2377 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2378 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2380 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2383 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2384 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2386 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2389 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2390 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2392 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2395 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2396 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2398 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2401 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2402 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2404 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2407 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2408 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2410 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2413 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2414 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2416 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2419 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2420 vcreate_s8 (uint64_t __a)
2422 return (int8x8_t) __a;
2425 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2426 vcreate_s16 (uint64_t __a)
2428 return (int16x4_t) __a;
2431 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2432 vcreate_s32 (uint64_t __a)
2434 return (int32x2_t) __a;
2437 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2438 vcreate_s64 (uint64_t __a)
2440 return (int64x1_t) __a;
2443 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2444 vcreate_f32 (uint64_t __a)
2446 return (float32x2_t) __a;
2449 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2450 vcreate_u8 (uint64_t __a)
2452 return (uint8x8_t) __a;
2455 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2456 vcreate_u16 (uint64_t __a)
2458 return (uint16x4_t) __a;
2461 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2462 vcreate_u32 (uint64_t __a)
2464 return (uint32x2_t) __a;
2467 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2468 vcreate_u64 (uint64_t __a)
2470 return (uint64x1_t) __a;
2473 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2474 vcreate_f64 (uint64_t __a)
2476 return (float64x1_t) __builtin_aarch64_createdf (__a);
2479 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2480 vcreate_p8 (uint64_t __a)
2482 return (poly8x8_t) __a;
2485 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2486 vcreate_p16 (uint64_t __a)
2488 return (poly16x4_t) __a;
2491 /* vget_lane */
2493 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2494 vget_lane_f32 (float32x2_t __a, const int __b)
2496 return __aarch64_vget_lane_f32 (__a, __b);
2499 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2500 vget_lane_f64 (float64x1_t __a, const int __b)
2502 return __aarch64_vget_lane_f64 (__a, __b);
2505 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2506 vget_lane_p8 (poly8x8_t __a, const int __b)
2508 return __aarch64_vget_lane_p8 (__a, __b);
2511 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2512 vget_lane_p16 (poly16x4_t __a, const int __b)
2514 return __aarch64_vget_lane_p16 (__a, __b);
2517 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2518 vget_lane_s8 (int8x8_t __a, const int __b)
2520 return __aarch64_vget_lane_s8 (__a, __b);
2523 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2524 vget_lane_s16 (int16x4_t __a, const int __b)
2526 return __aarch64_vget_lane_s16 (__a, __b);
2529 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2530 vget_lane_s32 (int32x2_t __a, const int __b)
2532 return __aarch64_vget_lane_s32 (__a, __b);
2535 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2536 vget_lane_s64 (int64x1_t __a, const int __b)
2538 return __aarch64_vget_lane_s64 (__a, __b);
2541 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2542 vget_lane_u8 (uint8x8_t __a, const int __b)
2544 return __aarch64_vget_lane_u8 (__a, __b);
2547 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2548 vget_lane_u16 (uint16x4_t __a, const int __b)
2550 return __aarch64_vget_lane_u16 (__a, __b);
2553 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2554 vget_lane_u32 (uint32x2_t __a, const int __b)
2556 return __aarch64_vget_lane_u32 (__a, __b);
2559 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2560 vget_lane_u64 (uint64x1_t __a, const int __b)
2562 return __aarch64_vget_lane_u64 (__a, __b);
2565 /* vgetq_lane */
2567 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2568 vgetq_lane_f32 (float32x4_t __a, const int __b)
2570 return __aarch64_vgetq_lane_f32 (__a, __b);
2573 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2574 vgetq_lane_f64 (float64x2_t __a, const int __b)
2576 return __aarch64_vgetq_lane_f64 (__a, __b);
2579 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2580 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2582 return __aarch64_vgetq_lane_p8 (__a, __b);
2585 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2586 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2588 return __aarch64_vgetq_lane_p16 (__a, __b);
2591 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2592 vgetq_lane_s8 (int8x16_t __a, const int __b)
2594 return __aarch64_vgetq_lane_s8 (__a, __b);
2597 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2598 vgetq_lane_s16 (int16x8_t __a, const int __b)
2600 return __aarch64_vgetq_lane_s16 (__a, __b);
2603 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2604 vgetq_lane_s32 (int32x4_t __a, const int __b)
2606 return __aarch64_vgetq_lane_s32 (__a, __b);
2609 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2610 vgetq_lane_s64 (int64x2_t __a, const int __b)
2612 return __aarch64_vgetq_lane_s64 (__a, __b);
2615 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2616 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2618 return __aarch64_vgetq_lane_u8 (__a, __b);
2621 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2622 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2624 return __aarch64_vgetq_lane_u16 (__a, __b);
2627 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2628 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2630 return __aarch64_vgetq_lane_u32 (__a, __b);
2633 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2634 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2636 return __aarch64_vgetq_lane_u64 (__a, __b);
2639 /* vreinterpret */
2641 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2642 vreinterpret_p8_s8 (int8x8_t __a)
2644 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2647 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2648 vreinterpret_p8_s16 (int16x4_t __a)
2650 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2654 vreinterpret_p8_s32 (int32x2_t __a)
2656 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2659 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2660 vreinterpret_p8_s64 (int64x1_t __a)
2662 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2665 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2666 vreinterpret_p8_f32 (float32x2_t __a)
2668 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2671 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2672 vreinterpret_p8_u8 (uint8x8_t __a)
2674 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2677 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2678 vreinterpret_p8_u16 (uint16x4_t __a)
2680 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2683 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2684 vreinterpret_p8_u32 (uint32x2_t __a)
2686 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2689 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2690 vreinterpret_p8_u64 (uint64x1_t __a)
2692 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2695 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2696 vreinterpret_p8_p16 (poly16x4_t __a)
2698 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2701 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2702 vreinterpretq_p8_s8 (int8x16_t __a)
2704 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2707 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2708 vreinterpretq_p8_s16 (int16x8_t __a)
2710 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2713 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2714 vreinterpretq_p8_s32 (int32x4_t __a)
2716 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2719 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2720 vreinterpretq_p8_s64 (int64x2_t __a)
2722 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2725 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2726 vreinterpretq_p8_f32 (float32x4_t __a)
2728 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2731 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2732 vreinterpretq_p8_u8 (uint8x16_t __a)
2734 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2735 __a);
2738 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2739 vreinterpretq_p8_u16 (uint16x8_t __a)
2741 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2742 __a);
2745 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2746 vreinterpretq_p8_u32 (uint32x4_t __a)
2748 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2749 __a);
2752 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2753 vreinterpretq_p8_u64 (uint64x2_t __a)
2755 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2756 __a);
2759 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2760 vreinterpretq_p8_p16 (poly16x8_t __a)
2762 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2763 __a);
2766 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2767 vreinterpret_p16_s8 (int8x8_t __a)
2769 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2772 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2773 vreinterpret_p16_s16 (int16x4_t __a)
2775 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2778 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2779 vreinterpret_p16_s32 (int32x2_t __a)
2781 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2784 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2785 vreinterpret_p16_s64 (int64x1_t __a)
2787 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2790 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2791 vreinterpret_p16_f32 (float32x2_t __a)
2793 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2796 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2797 vreinterpret_p16_u8 (uint8x8_t __a)
2799 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2802 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2803 vreinterpret_p16_u16 (uint16x4_t __a)
2805 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2808 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2809 vreinterpret_p16_u32 (uint32x2_t __a)
2811 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2814 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2815 vreinterpret_p16_u64 (uint64x1_t __a)
2817 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2820 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2821 vreinterpret_p16_p8 (poly8x8_t __a)
2823 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2826 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2827 vreinterpretq_p16_s8 (int8x16_t __a)
2829 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2832 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2833 vreinterpretq_p16_s16 (int16x8_t __a)
2835 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2838 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2839 vreinterpretq_p16_s32 (int32x4_t __a)
2841 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2844 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2845 vreinterpretq_p16_s64 (int64x2_t __a)
2847 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2850 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2851 vreinterpretq_p16_f32 (float32x4_t __a)
2853 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2856 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2857 vreinterpretq_p16_u8 (uint8x16_t __a)
2859 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2860 __a);
2863 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2864 vreinterpretq_p16_u16 (uint16x8_t __a)
2866 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2869 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2870 vreinterpretq_p16_u32 (uint32x4_t __a)
2872 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2875 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2876 vreinterpretq_p16_u64 (uint64x2_t __a)
2878 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2881 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2882 vreinterpretq_p16_p8 (poly8x16_t __a)
2884 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2885 __a);
2888 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2889 vreinterpret_f32_s8 (int8x8_t __a)
2891 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2894 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2895 vreinterpret_f32_s16 (int16x4_t __a)
2897 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2900 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2901 vreinterpret_f32_s32 (int32x2_t __a)
2903 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2906 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2907 vreinterpret_f32_s64 (int64x1_t __a)
2909 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2912 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2913 vreinterpret_f32_u8 (uint8x8_t __a)
2915 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2918 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2919 vreinterpret_f32_u16 (uint16x4_t __a)
2921 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2922 __a);
2925 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2926 vreinterpret_f32_u32 (uint32x2_t __a)
2928 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2929 __a);
2932 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2933 vreinterpret_f32_u64 (uint64x1_t __a)
2935 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2938 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2939 vreinterpret_f32_p8 (poly8x8_t __a)
2941 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2944 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2945 vreinterpret_f32_p16 (poly16x4_t __a)
2947 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2948 __a);
2951 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2952 vreinterpretq_f32_s8 (int8x16_t __a)
2954 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2957 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2958 vreinterpretq_f32_s16 (int16x8_t __a)
2960 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2963 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2964 vreinterpretq_f32_s32 (int32x4_t __a)
2966 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2969 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2970 vreinterpretq_f32_s64 (int64x2_t __a)
2972 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2975 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2976 vreinterpretq_f32_u8 (uint8x16_t __a)
2978 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2979 __a);
2982 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2983 vreinterpretq_f32_u16 (uint16x8_t __a)
2985 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2986 __a);
2989 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2990 vreinterpretq_f32_u32 (uint32x4_t __a)
2992 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2993 __a);
2996 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2997 vreinterpretq_f32_u64 (uint64x2_t __a)
2999 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
3000 __a);
3003 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3004 vreinterpretq_f32_p8 (poly8x16_t __a)
3006 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
3007 __a);
3010 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3011 vreinterpretq_f32_p16 (poly16x8_t __a)
3013 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
3014 __a);
3017 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3018 vreinterpret_s64_s8 (int8x8_t __a)
3020 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3023 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3024 vreinterpret_s64_s16 (int16x4_t __a)
3026 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3029 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3030 vreinterpret_s64_s32 (int32x2_t __a)
3032 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3035 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3036 vreinterpret_s64_f32 (float32x2_t __a)
3038 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3041 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3042 vreinterpret_s64_u8 (uint8x8_t __a)
3044 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3047 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3048 vreinterpret_s64_u16 (uint16x4_t __a)
3050 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3053 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3054 vreinterpret_s64_u32 (uint32x2_t __a)
3056 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3059 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3060 vreinterpret_s64_u64 (uint64x1_t __a)
3062 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
3065 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3066 vreinterpret_s64_p8 (poly8x8_t __a)
3068 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3071 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3072 vreinterpret_s64_p16 (poly16x4_t __a)
3074 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3078 vreinterpretq_s64_s8 (int8x16_t __a)
3080 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3083 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3084 vreinterpretq_s64_s16 (int16x8_t __a)
3086 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3089 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3090 vreinterpretq_s64_s32 (int32x4_t __a)
3092 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3095 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3096 vreinterpretq_s64_f32 (float32x4_t __a)
3098 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3101 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3102 vreinterpretq_s64_u8 (uint8x16_t __a)
3104 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3107 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3108 vreinterpretq_s64_u16 (uint16x8_t __a)
3110 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3113 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3114 vreinterpretq_s64_u32 (uint32x4_t __a)
3116 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3119 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3120 vreinterpretq_s64_u64 (uint64x2_t __a)
3122 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3125 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3126 vreinterpretq_s64_p8 (poly8x16_t __a)
3128 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3131 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3132 vreinterpretq_s64_p16 (poly16x8_t __a)
3134 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3138 vreinterpret_u64_s8 (int8x8_t __a)
3140 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3143 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3144 vreinterpret_u64_s16 (int16x4_t __a)
3146 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3150 vreinterpret_u64_s32 (int32x2_t __a)
3152 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3155 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3156 vreinterpret_u64_s64 (int64x1_t __a)
3158 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3162 vreinterpret_u64_f32 (float32x2_t __a)
3164 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3167 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3168 vreinterpret_u64_u8 (uint8x8_t __a)
3170 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3173 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3174 vreinterpret_u64_u16 (uint16x4_t __a)
3176 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3179 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3180 vreinterpret_u64_u32 (uint32x2_t __a)
3182 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3185 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3186 vreinterpret_u64_p8 (poly8x8_t __a)
3188 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3191 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3192 vreinterpret_u64_p16 (poly16x4_t __a)
3194 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3198 vreinterpretq_u64_s8 (int8x16_t __a)
3200 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3203 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3204 vreinterpretq_u64_s16 (int16x8_t __a)
3206 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3209 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3210 vreinterpretq_u64_s32 (int32x4_t __a)
3212 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3215 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3216 vreinterpretq_u64_s64 (int64x2_t __a)
3218 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3221 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3222 vreinterpretq_u64_f32 (float32x4_t __a)
3224 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3227 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3228 vreinterpretq_u64_u8 (uint8x16_t __a)
3230 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3231 __a);
3234 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3235 vreinterpretq_u64_u16 (uint16x8_t __a)
3237 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3240 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3241 vreinterpretq_u64_u32 (uint32x4_t __a)
3243 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3246 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3247 vreinterpretq_u64_p8 (poly8x16_t __a)
3249 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3250 __a);
3253 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3254 vreinterpretq_u64_p16 (poly16x8_t __a)
3256 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3259 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3260 vreinterpret_s8_s16 (int16x4_t __a)
3262 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3265 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3266 vreinterpret_s8_s32 (int32x2_t __a)
3268 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3271 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3272 vreinterpret_s8_s64 (int64x1_t __a)
3274 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3277 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3278 vreinterpret_s8_f32 (float32x2_t __a)
3280 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3283 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3284 vreinterpret_s8_u8 (uint8x8_t __a)
3286 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3289 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3290 vreinterpret_s8_u16 (uint16x4_t __a)
3292 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3295 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3296 vreinterpret_s8_u32 (uint32x2_t __a)
3298 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3301 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3302 vreinterpret_s8_u64 (uint64x1_t __a)
3304 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3307 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3308 vreinterpret_s8_p8 (poly8x8_t __a)
3310 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3313 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3314 vreinterpret_s8_p16 (poly16x4_t __a)
3316 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3319 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3320 vreinterpretq_s8_s16 (int16x8_t __a)
3322 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3325 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3326 vreinterpretq_s8_s32 (int32x4_t __a)
3328 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3331 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3332 vreinterpretq_s8_s64 (int64x2_t __a)
3334 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3337 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3338 vreinterpretq_s8_f32 (float32x4_t __a)
3340 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3343 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3344 vreinterpretq_s8_u8 (uint8x16_t __a)
3346 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3347 __a);
3350 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3351 vreinterpretq_s8_u16 (uint16x8_t __a)
3353 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3356 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3357 vreinterpretq_s8_u32 (uint32x4_t __a)
3359 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3362 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3363 vreinterpretq_s8_u64 (uint64x2_t __a)
3365 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3368 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3369 vreinterpretq_s8_p8 (poly8x16_t __a)
3371 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3372 __a);
3375 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3376 vreinterpretq_s8_p16 (poly16x8_t __a)
3378 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3381 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3382 vreinterpret_s16_s8 (int8x8_t __a)
3384 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3387 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3388 vreinterpret_s16_s32 (int32x2_t __a)
3390 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3393 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3394 vreinterpret_s16_s64 (int64x1_t __a)
3396 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3400 vreinterpret_s16_f32 (float32x2_t __a)
3402 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3405 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3406 vreinterpret_s16_u8 (uint8x8_t __a)
3408 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3412 vreinterpret_s16_u16 (uint16x4_t __a)
3414 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3417 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3418 vreinterpret_s16_u32 (uint32x2_t __a)
3420 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3423 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3424 vreinterpret_s16_u64 (uint64x1_t __a)
3426 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3429 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3430 vreinterpret_s16_p8 (poly8x8_t __a)
3432 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3436 vreinterpret_s16_p16 (poly16x4_t __a)
3438 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3441 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3442 vreinterpretq_s16_s8 (int8x16_t __a)
3444 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3447 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3448 vreinterpretq_s16_s32 (int32x4_t __a)
3450 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3453 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3454 vreinterpretq_s16_s64 (int64x2_t __a)
3456 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3459 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3460 vreinterpretq_s16_f32 (float32x4_t __a)
3462 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3466 vreinterpretq_s16_u8 (uint8x16_t __a)
3468 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3472 vreinterpretq_s16_u16 (uint16x8_t __a)
3474 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3477 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3478 vreinterpretq_s16_u32 (uint32x4_t __a)
3480 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3483 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3484 vreinterpretq_s16_u64 (uint64x2_t __a)
3486 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3490 vreinterpretq_s16_p8 (poly8x16_t __a)
3492 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3495 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3496 vreinterpretq_s16_p16 (poly16x8_t __a)
3498 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3501 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3502 vreinterpret_s32_s8 (int8x8_t __a)
3504 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3507 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3508 vreinterpret_s32_s16 (int16x4_t __a)
3510 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3513 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3514 vreinterpret_s32_s64 (int64x1_t __a)
3516 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3519 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3520 vreinterpret_s32_f32 (float32x2_t __a)
3522 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3525 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3526 vreinterpret_s32_u8 (uint8x8_t __a)
3528 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3531 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3532 vreinterpret_s32_u16 (uint16x4_t __a)
3534 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3537 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3538 vreinterpret_s32_u32 (uint32x2_t __a)
3540 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3543 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3544 vreinterpret_s32_u64 (uint64x1_t __a)
3546 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3549 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3550 vreinterpret_s32_p8 (poly8x8_t __a)
3552 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3555 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3556 vreinterpret_s32_p16 (poly16x4_t __a)
3558 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3561 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3562 vreinterpretq_s32_s8 (int8x16_t __a)
3564 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3567 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3568 vreinterpretq_s32_s16 (int16x8_t __a)
3570 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3573 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3574 vreinterpretq_s32_s64 (int64x2_t __a)
3576 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3579 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3580 vreinterpretq_s32_f32 (float32x4_t __a)
3582 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3585 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3586 vreinterpretq_s32_u8 (uint8x16_t __a)
3588 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3592 vreinterpretq_s32_u16 (uint16x8_t __a)
3594 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3597 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3598 vreinterpretq_s32_u32 (uint32x4_t __a)
3600 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3603 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3604 vreinterpretq_s32_u64 (uint64x2_t __a)
3606 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3610 vreinterpretq_s32_p8 (poly8x16_t __a)
3612 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3615 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3616 vreinterpretq_s32_p16 (poly16x8_t __a)
3618 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3621 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3622 vreinterpret_u8_s8 (int8x8_t __a)
3624 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3627 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3628 vreinterpret_u8_s16 (int16x4_t __a)
3630 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3633 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3634 vreinterpret_u8_s32 (int32x2_t __a)
3636 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3640 vreinterpret_u8_s64 (int64x1_t __a)
3642 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3646 vreinterpret_u8_f32 (float32x2_t __a)
3648 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3651 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3652 vreinterpret_u8_u16 (uint16x4_t __a)
3654 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3657 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3658 vreinterpret_u8_u32 (uint32x2_t __a)
3660 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3663 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3664 vreinterpret_u8_u64 (uint64x1_t __a)
3666 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3669 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3670 vreinterpret_u8_p8 (poly8x8_t __a)
3672 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3675 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3676 vreinterpret_u8_p16 (poly16x4_t __a)
3678 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3681 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3682 vreinterpretq_u8_s8 (int8x16_t __a)
3684 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3688 vreinterpretq_u8_s16 (int16x8_t __a)
3690 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3693 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3694 vreinterpretq_u8_s32 (int32x4_t __a)
3696 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3700 vreinterpretq_u8_s64 (int64x2_t __a)
3702 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3705 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3706 vreinterpretq_u8_f32 (float32x4_t __a)
3708 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3711 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3712 vreinterpretq_u8_u16 (uint16x8_t __a)
3714 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3715 __a);
3718 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3719 vreinterpretq_u8_u32 (uint32x4_t __a)
3721 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3722 __a);
3725 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3726 vreinterpretq_u8_u64 (uint64x2_t __a)
3728 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3729 __a);
3732 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3733 vreinterpretq_u8_p8 (poly8x16_t __a)
3735 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3736 __a);
3739 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3740 vreinterpretq_u8_p16 (poly16x8_t __a)
3742 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3743 __a);
3746 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3747 vreinterpret_u16_s8 (int8x8_t __a)
3749 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3753 vreinterpret_u16_s16 (int16x4_t __a)
3755 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3758 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3759 vreinterpret_u16_s32 (int32x2_t __a)
3761 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3764 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3765 vreinterpret_u16_s64 (int64x1_t __a)
3767 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3770 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3771 vreinterpret_u16_f32 (float32x2_t __a)
3773 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3776 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3777 vreinterpret_u16_u8 (uint8x8_t __a)
3779 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3782 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3783 vreinterpret_u16_u32 (uint32x2_t __a)
3785 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3788 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3789 vreinterpret_u16_u64 (uint64x1_t __a)
3791 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3794 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3795 vreinterpret_u16_p8 (poly8x8_t __a)
3797 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3800 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3801 vreinterpret_u16_p16 (poly16x4_t __a)
3803 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3806 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3807 vreinterpretq_u16_s8 (int8x16_t __a)
3809 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3812 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3813 vreinterpretq_u16_s16 (int16x8_t __a)
3815 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3818 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3819 vreinterpretq_u16_s32 (int32x4_t __a)
3821 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3824 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3825 vreinterpretq_u16_s64 (int64x2_t __a)
3827 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3830 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3831 vreinterpretq_u16_f32 (float32x4_t __a)
3833 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3836 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3837 vreinterpretq_u16_u8 (uint8x16_t __a)
3839 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3840 __a);
3843 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3844 vreinterpretq_u16_u32 (uint32x4_t __a)
3846 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3849 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3850 vreinterpretq_u16_u64 (uint64x2_t __a)
3852 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3855 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3856 vreinterpretq_u16_p8 (poly8x16_t __a)
3858 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3859 __a);
3862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3863 vreinterpretq_u16_p16 (poly16x8_t __a)
3865 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3868 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3869 vreinterpret_u32_s8 (int8x8_t __a)
3871 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3874 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3875 vreinterpret_u32_s16 (int16x4_t __a)
3877 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3880 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3881 vreinterpret_u32_s32 (int32x2_t __a)
3883 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3886 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3887 vreinterpret_u32_s64 (int64x1_t __a)
3889 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3892 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3893 vreinterpret_u32_f32 (float32x2_t __a)
3895 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3898 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3899 vreinterpret_u32_u8 (uint8x8_t __a)
3901 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3904 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3905 vreinterpret_u32_u16 (uint16x4_t __a)
3907 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3910 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3911 vreinterpret_u32_u64 (uint64x1_t __a)
3913 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3916 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3917 vreinterpret_u32_p8 (poly8x8_t __a)
3919 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3923 vreinterpret_u32_p16 (poly16x4_t __a)
3925 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3928 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3929 vreinterpretq_u32_s8 (int8x16_t __a)
3931 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3934 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3935 vreinterpretq_u32_s16 (int16x8_t __a)
3937 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3940 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3941 vreinterpretq_u32_s32 (int32x4_t __a)
3943 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3946 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3947 vreinterpretq_u32_s64 (int64x2_t __a)
3949 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3952 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3953 vreinterpretq_u32_f32 (float32x4_t __a)
3955 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3959 vreinterpretq_u32_u8 (uint8x16_t __a)
3961 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3962 __a);
3965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3966 vreinterpretq_u32_u16 (uint16x8_t __a)
3968 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3971 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3972 vreinterpretq_u32_u64 (uint64x2_t __a)
3974 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3977 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3978 vreinterpretq_u32_p8 (poly8x16_t __a)
3980 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3981 __a);
3984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3985 vreinterpretq_u32_p16 (poly16x8_t __a)
3987 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3990 #define __GET_LOW(__TYPE) \
3991 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
3992 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
3993 return vreinterpret_##__TYPE##_u64 (lo);
3995 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3996 vget_low_f32 (float32x4_t __a)
3998 __GET_LOW (f32);
4001 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4002 vget_low_f64 (float64x2_t __a)
4004 return vgetq_lane_f64 (__a, 0);
4007 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4008 vget_low_p8 (poly8x16_t __a)
4010 __GET_LOW (p8);
4013 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4014 vget_low_p16 (poly16x8_t __a)
4016 __GET_LOW (p16);
4019 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4020 vget_low_s8 (int8x16_t __a)
4022 __GET_LOW (s8);
4025 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4026 vget_low_s16 (int16x8_t __a)
4028 __GET_LOW (s16);
4031 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4032 vget_low_s32 (int32x4_t __a)
4034 __GET_LOW (s32);
4037 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4038 vget_low_s64 (int64x2_t __a)
4040 return vgetq_lane_s64 (__a, 0);
4043 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4044 vget_low_u8 (uint8x16_t __a)
4046 __GET_LOW (u8);
4049 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4050 vget_low_u16 (uint16x8_t __a)
4052 __GET_LOW (u16);
4055 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4056 vget_low_u32 (uint32x4_t __a)
4058 __GET_LOW (u32);
4061 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4062 vget_low_u64 (uint64x2_t __a)
4064 return vgetq_lane_u64 (__a, 0);
4067 #undef __GET_LOW
4069 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4070 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4072 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4075 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4076 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4078 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4081 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4082 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4084 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4088 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4090 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4093 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4094 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4096 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4099 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4100 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4102 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4103 (int8x8_t) __b);
4106 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4107 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4109 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4110 (int16x4_t) __b);
4113 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4114 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4116 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4117 (int32x2_t) __b);
4120 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4121 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4123 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4124 (int64x1_t) __b);
4127 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4128 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4130 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4133 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4134 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4136 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4137 (int8x8_t) __b);
4140 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4141 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4143 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4144 (int16x4_t) __b);
4147 /* Start of temporary inline asm implementations. */
4149 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4150 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4152 int8x8_t result;
4153 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4154 : "=w"(result)
4155 : "0"(a), "w"(b), "w"(c)
4156 : /* No clobbers */);
4157 return result;
4160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4161 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4163 int16x4_t result;
4164 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4165 : "=w"(result)
4166 : "0"(a), "w"(b), "w"(c)
4167 : /* No clobbers */);
4168 return result;
4171 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4172 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4174 int32x2_t result;
4175 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4176 : "=w"(result)
4177 : "0"(a), "w"(b), "w"(c)
4178 : /* No clobbers */);
4179 return result;
4182 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4183 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4185 uint8x8_t result;
4186 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4187 : "=w"(result)
4188 : "0"(a), "w"(b), "w"(c)
4189 : /* No clobbers */);
4190 return result;
4193 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4194 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4196 uint16x4_t result;
4197 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4198 : "=w"(result)
4199 : "0"(a), "w"(b), "w"(c)
4200 : /* No clobbers */);
4201 return result;
4204 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4205 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4207 uint32x2_t result;
4208 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4209 : "=w"(result)
4210 : "0"(a), "w"(b), "w"(c)
4211 : /* No clobbers */);
4212 return result;
4215 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4216 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4218 int16x8_t result;
4219 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4220 : "=w"(result)
4221 : "0"(a), "w"(b), "w"(c)
4222 : /* No clobbers */);
4223 return result;
4226 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4227 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4229 int32x4_t result;
4230 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4231 : "=w"(result)
4232 : "0"(a), "w"(b), "w"(c)
4233 : /* No clobbers */);
4234 return result;
4237 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4238 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4240 int64x2_t result;
4241 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4242 : "=w"(result)
4243 : "0"(a), "w"(b), "w"(c)
4244 : /* No clobbers */);
4245 return result;
4248 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4249 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4251 uint16x8_t result;
4252 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4253 : "=w"(result)
4254 : "0"(a), "w"(b), "w"(c)
4255 : /* No clobbers */);
4256 return result;
4259 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4260 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4262 uint32x4_t result;
4263 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4264 : "=w"(result)
4265 : "0"(a), "w"(b), "w"(c)
4266 : /* No clobbers */);
4267 return result;
4270 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4271 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4273 uint64x2_t result;
4274 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4275 : "=w"(result)
4276 : "0"(a), "w"(b), "w"(c)
4277 : /* No clobbers */);
4278 return result;
4281 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4282 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4284 int16x8_t result;
4285 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4286 : "=w"(result)
4287 : "0"(a), "w"(b), "w"(c)
4288 : /* No clobbers */);
4289 return result;
4292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4293 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4295 int32x4_t result;
4296 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4297 : "=w"(result)
4298 : "0"(a), "w"(b), "w"(c)
4299 : /* No clobbers */);
4300 return result;
4303 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4304 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4306 int64x2_t result;
4307 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4308 : "=w"(result)
4309 : "0"(a), "w"(b), "w"(c)
4310 : /* No clobbers */);
4311 return result;
4314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4315 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4317 uint16x8_t result;
4318 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4319 : "=w"(result)
4320 : "0"(a), "w"(b), "w"(c)
4321 : /* No clobbers */);
4322 return result;
4325 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4326 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4328 uint32x4_t result;
4329 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4330 : "=w"(result)
4331 : "0"(a), "w"(b), "w"(c)
4332 : /* No clobbers */);
4333 return result;
4336 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4337 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4339 uint64x2_t result;
4340 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4341 : "=w"(result)
4342 : "0"(a), "w"(b), "w"(c)
4343 : /* No clobbers */);
4344 return result;
4347 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4348 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4350 int8x16_t result;
4351 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4352 : "=w"(result)
4353 : "0"(a), "w"(b), "w"(c)
4354 : /* No clobbers */);
4355 return result;
4358 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4359 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4361 int16x8_t result;
4362 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4363 : "=w"(result)
4364 : "0"(a), "w"(b), "w"(c)
4365 : /* No clobbers */);
4366 return result;
4369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4370 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4372 int32x4_t result;
4373 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4374 : "=w"(result)
4375 : "0"(a), "w"(b), "w"(c)
4376 : /* No clobbers */);
4377 return result;
4380 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4381 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4383 uint8x16_t result;
4384 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4385 : "=w"(result)
4386 : "0"(a), "w"(b), "w"(c)
4387 : /* No clobbers */);
4388 return result;
4391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4392 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4394 uint16x8_t result;
4395 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4396 : "=w"(result)
4397 : "0"(a), "w"(b), "w"(c)
4398 : /* No clobbers */);
4399 return result;
4402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4403 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4405 uint32x4_t result;
4406 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4407 : "=w"(result)
4408 : "0"(a), "w"(b), "w"(c)
4409 : /* No clobbers */);
4410 return result;
4413 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4414 vabd_f32 (float32x2_t a, float32x2_t b)
4416 float32x2_t result;
4417 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4418 : "=w"(result)
4419 : "w"(a), "w"(b)
4420 : /* No clobbers */);
4421 return result;
4424 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4425 vabd_s8 (int8x8_t a, int8x8_t b)
4427 int8x8_t result;
4428 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4429 : "=w"(result)
4430 : "w"(a), "w"(b)
4431 : /* No clobbers */);
4432 return result;
4435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4436 vabd_s16 (int16x4_t a, int16x4_t b)
4438 int16x4_t result;
4439 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4440 : "=w"(result)
4441 : "w"(a), "w"(b)
4442 : /* No clobbers */);
4443 return result;
4446 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4447 vabd_s32 (int32x2_t a, int32x2_t b)
4449 int32x2_t result;
4450 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4451 : "=w"(result)
4452 : "w"(a), "w"(b)
4453 : /* No clobbers */);
4454 return result;
4457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4458 vabd_u8 (uint8x8_t a, uint8x8_t b)
4460 uint8x8_t result;
4461 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4462 : "=w"(result)
4463 : "w"(a), "w"(b)
4464 : /* No clobbers */);
4465 return result;
4468 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4469 vabd_u16 (uint16x4_t a, uint16x4_t b)
4471 uint16x4_t result;
4472 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4473 : "=w"(result)
4474 : "w"(a), "w"(b)
4475 : /* No clobbers */);
4476 return result;
4479 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4480 vabd_u32 (uint32x2_t a, uint32x2_t b)
4482 uint32x2_t result;
4483 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4484 : "=w"(result)
4485 : "w"(a), "w"(b)
4486 : /* No clobbers */);
4487 return result;
4490 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4491 vabdd_f64 (float64_t a, float64_t b)
4493 float64_t result;
4494 __asm__ ("fabd %d0, %d1, %d2"
4495 : "=w"(result)
4496 : "w"(a), "w"(b)
4497 : /* No clobbers */);
4498 return result;
4501 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4502 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4504 int16x8_t result;
4505 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4506 : "=w"(result)
4507 : "w"(a), "w"(b)
4508 : /* No clobbers */);
4509 return result;
4512 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4513 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4515 int32x4_t result;
4516 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4517 : "=w"(result)
4518 : "w"(a), "w"(b)
4519 : /* No clobbers */);
4520 return result;
4523 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4524 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4526 int64x2_t result;
4527 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4528 : "=w"(result)
4529 : "w"(a), "w"(b)
4530 : /* No clobbers */);
4531 return result;
4534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4535 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4537 uint16x8_t result;
4538 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4539 : "=w"(result)
4540 : "w"(a), "w"(b)
4541 : /* No clobbers */);
4542 return result;
4545 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4546 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4548 uint32x4_t result;
4549 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4550 : "=w"(result)
4551 : "w"(a), "w"(b)
4552 : /* No clobbers */);
4553 return result;
4556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4557 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4559 uint64x2_t result;
4560 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4561 : "=w"(result)
4562 : "w"(a), "w"(b)
4563 : /* No clobbers */);
4564 return result;
4567 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4568 vabdl_s8 (int8x8_t a, int8x8_t b)
4570 int16x8_t result;
4571 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4572 : "=w"(result)
4573 : "w"(a), "w"(b)
4574 : /* No clobbers */);
4575 return result;
4578 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4579 vabdl_s16 (int16x4_t a, int16x4_t b)
4581 int32x4_t result;
4582 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4583 : "=w"(result)
4584 : "w"(a), "w"(b)
4585 : /* No clobbers */);
4586 return result;
4589 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4590 vabdl_s32 (int32x2_t a, int32x2_t b)
4592 int64x2_t result;
4593 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4594 : "=w"(result)
4595 : "w"(a), "w"(b)
4596 : /* No clobbers */);
4597 return result;
4600 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4601 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4603 uint16x8_t result;
4604 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4605 : "=w"(result)
4606 : "w"(a), "w"(b)
4607 : /* No clobbers */);
4608 return result;
4611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4612 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4614 uint32x4_t result;
4615 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4616 : "=w"(result)
4617 : "w"(a), "w"(b)
4618 : /* No clobbers */);
4619 return result;
4622 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4623 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4625 uint64x2_t result;
4626 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4627 : "=w"(result)
4628 : "w"(a), "w"(b)
4629 : /* No clobbers */);
4630 return result;
4633 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4634 vabdq_f32 (float32x4_t a, float32x4_t b)
4636 float32x4_t result;
4637 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4638 : "=w"(result)
4639 : "w"(a), "w"(b)
4640 : /* No clobbers */);
4641 return result;
4644 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4645 vabdq_f64 (float64x2_t a, float64x2_t b)
4647 float64x2_t result;
4648 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4649 : "=w"(result)
4650 : "w"(a), "w"(b)
4651 : /* No clobbers */);
4652 return result;
4655 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4656 vabdq_s8 (int8x16_t a, int8x16_t b)
4658 int8x16_t result;
4659 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4660 : "=w"(result)
4661 : "w"(a), "w"(b)
4662 : /* No clobbers */);
4663 return result;
4666 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4667 vabdq_s16 (int16x8_t a, int16x8_t b)
4669 int16x8_t result;
4670 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4671 : "=w"(result)
4672 : "w"(a), "w"(b)
4673 : /* No clobbers */);
4674 return result;
4677 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4678 vabdq_s32 (int32x4_t a, int32x4_t b)
4680 int32x4_t result;
4681 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4682 : "=w"(result)
4683 : "w"(a), "w"(b)
4684 : /* No clobbers */);
4685 return result;
4688 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4689 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4691 uint8x16_t result;
4692 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4693 : "=w"(result)
4694 : "w"(a), "w"(b)
4695 : /* No clobbers */);
4696 return result;
4699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4700 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4702 uint16x8_t result;
4703 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4704 : "=w"(result)
4705 : "w"(a), "w"(b)
4706 : /* No clobbers */);
4707 return result;
4710 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4711 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4713 uint32x4_t result;
4714 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4715 : "=w"(result)
4716 : "w"(a), "w"(b)
4717 : /* No clobbers */);
4718 return result;
4721 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4722 vabds_f32 (float32_t a, float32_t b)
4724 float32_t result;
4725 __asm__ ("fabd %s0, %s1, %s2"
4726 : "=w"(result)
4727 : "w"(a), "w"(b)
4728 : /* No clobbers */);
4729 return result;
4732 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4733 vaddlv_s8 (int8x8_t a)
4735 int16_t result;
4736 __asm__ ("saddlv %h0,%1.8b"
4737 : "=w"(result)
4738 : "w"(a)
4739 : /* No clobbers */);
4740 return result;
4743 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4744 vaddlv_s16 (int16x4_t a)
4746 int32_t result;
4747 __asm__ ("saddlv %s0,%1.4h"
4748 : "=w"(result)
4749 : "w"(a)
4750 : /* No clobbers */);
4751 return result;
4754 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4755 vaddlv_u8 (uint8x8_t a)
4757 uint16_t result;
4758 __asm__ ("uaddlv %h0,%1.8b"
4759 : "=w"(result)
4760 : "w"(a)
4761 : /* No clobbers */);
4762 return result;
4765 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4766 vaddlv_u16 (uint16x4_t a)
4768 uint32_t result;
4769 __asm__ ("uaddlv %s0,%1.4h"
4770 : "=w"(result)
4771 : "w"(a)
4772 : /* No clobbers */);
4773 return result;
4776 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4777 vaddlvq_s8 (int8x16_t a)
4779 int16_t result;
4780 __asm__ ("saddlv %h0,%1.16b"
4781 : "=w"(result)
4782 : "w"(a)
4783 : /* No clobbers */);
4784 return result;
4787 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4788 vaddlvq_s16 (int16x8_t a)
4790 int32_t result;
4791 __asm__ ("saddlv %s0,%1.8h"
4792 : "=w"(result)
4793 : "w"(a)
4794 : /* No clobbers */);
4795 return result;
4798 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4799 vaddlvq_s32 (int32x4_t a)
4801 int64_t result;
4802 __asm__ ("saddlv %d0,%1.4s"
4803 : "=w"(result)
4804 : "w"(a)
4805 : /* No clobbers */);
4806 return result;
4809 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4810 vaddlvq_u8 (uint8x16_t a)
4812 uint16_t result;
4813 __asm__ ("uaddlv %h0,%1.16b"
4814 : "=w"(result)
4815 : "w"(a)
4816 : /* No clobbers */);
4817 return result;
4820 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4821 vaddlvq_u16 (uint16x8_t a)
4823 uint32_t result;
4824 __asm__ ("uaddlv %s0,%1.8h"
4825 : "=w"(result)
4826 : "w"(a)
4827 : /* No clobbers */);
4828 return result;
4831 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4832 vaddlvq_u32 (uint32x4_t a)
4834 uint64_t result;
4835 __asm__ ("uaddlv %d0,%1.4s"
4836 : "=w"(result)
4837 : "w"(a)
4838 : /* No clobbers */);
4839 return result;
4842 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4843 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4845 float32x2_t result;
4846 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4847 : "=w"(result)
4848 : "0"(a), "w"(b), "w"(c)
4849 : /* No clobbers */);
4850 return result;
4853 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4854 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4856 poly8x8_t result;
4857 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4858 : "=w"(result)
4859 : "0"(a), "w"(b), "w"(c)
4860 : /* No clobbers */);
4861 return result;
4864 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4865 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4867 poly16x4_t result;
4868 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4869 : "=w"(result)
4870 : "0"(a), "w"(b), "w"(c)
4871 : /* No clobbers */);
4872 return result;
4875 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4876 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4878 int8x8_t result;
4879 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4880 : "=w"(result)
4881 : "0"(a), "w"(b), "w"(c)
4882 : /* No clobbers */);
4883 return result;
4886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4887 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4889 int16x4_t result;
4890 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4891 : "=w"(result)
4892 : "0"(a), "w"(b), "w"(c)
4893 : /* No clobbers */);
4894 return result;
4897 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4898 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4900 int32x2_t result;
4901 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4902 : "=w"(result)
4903 : "0"(a), "w"(b), "w"(c)
4904 : /* No clobbers */);
4905 return result;
4908 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4909 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4911 int64x1_t result;
4912 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4913 : "=w"(result)
4914 : "0"(a), "w"(b), "w"(c)
4915 : /* No clobbers */);
4916 return result;
4919 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4920 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4922 uint8x8_t result;
4923 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4924 : "=w"(result)
4925 : "0"(a), "w"(b), "w"(c)
4926 : /* No clobbers */);
4927 return result;
4930 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4931 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4933 uint16x4_t result;
4934 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4935 : "=w"(result)
4936 : "0"(a), "w"(b), "w"(c)
4937 : /* No clobbers */);
4938 return result;
4941 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4942 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4944 uint32x2_t result;
4945 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4946 : "=w"(result)
4947 : "0"(a), "w"(b), "w"(c)
4948 : /* No clobbers */);
4949 return result;
4952 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4953 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4955 uint64x1_t result;
4956 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4957 : "=w"(result)
4958 : "0"(a), "w"(b), "w"(c)
4959 : /* No clobbers */);
4960 return result;
4963 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4964 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4966 float32x4_t result;
4967 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4968 : "=w"(result)
4969 : "0"(a), "w"(b), "w"(c)
4970 : /* No clobbers */);
4971 return result;
4974 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4975 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4977 float64x2_t result;
4978 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4979 : "=w"(result)
4980 : "0"(a), "w"(b), "w"(c)
4981 : /* No clobbers */);
4982 return result;
4985 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4986 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4988 poly8x16_t result;
4989 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4990 : "=w"(result)
4991 : "0"(a), "w"(b), "w"(c)
4992 : /* No clobbers */);
4993 return result;
4996 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4997 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
4999 poly16x8_t result;
5000 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5001 : "=w"(result)
5002 : "0"(a), "w"(b), "w"(c)
5003 : /* No clobbers */);
5004 return result;
5007 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5008 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
5010 int8x16_t result;
5011 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5012 : "=w"(result)
5013 : "0"(a), "w"(b), "w"(c)
5014 : /* No clobbers */);
5015 return result;
5018 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5019 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
5021 int16x8_t result;
5022 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5023 : "=w"(result)
5024 : "0"(a), "w"(b), "w"(c)
5025 : /* No clobbers */);
5026 return result;
5029 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5030 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
5032 int32x4_t result;
5033 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5034 : "=w"(result)
5035 : "0"(a), "w"(b), "w"(c)
5036 : /* No clobbers */);
5037 return result;
5040 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5041 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
5043 int64x2_t result;
5044 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5045 : "=w"(result)
5046 : "0"(a), "w"(b), "w"(c)
5047 : /* No clobbers */);
5048 return result;
5051 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5052 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5054 uint8x16_t result;
5055 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5056 : "=w"(result)
5057 : "0"(a), "w"(b), "w"(c)
5058 : /* No clobbers */);
5059 return result;
5062 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5063 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5065 uint16x8_t result;
5066 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5067 : "=w"(result)
5068 : "0"(a), "w"(b), "w"(c)
5069 : /* No clobbers */);
5070 return result;
5073 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5074 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5076 uint32x4_t result;
5077 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5078 : "=w"(result)
5079 : "0"(a), "w"(b), "w"(c)
5080 : /* No clobbers */);
5081 return result;
5084 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5085 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
5087 uint64x2_t result;
5088 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5089 : "=w"(result)
5090 : "0"(a), "w"(b), "w"(c)
5091 : /* No clobbers */);
5092 return result;
5095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5096 vcls_s8 (int8x8_t a)
5098 int8x8_t result;
5099 __asm__ ("cls %0.8b,%1.8b"
5100 : "=w"(result)
5101 : "w"(a)
5102 : /* No clobbers */);
5103 return result;
5106 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5107 vcls_s16 (int16x4_t a)
5109 int16x4_t result;
5110 __asm__ ("cls %0.4h,%1.4h"
5111 : "=w"(result)
5112 : "w"(a)
5113 : /* No clobbers */);
5114 return result;
5117 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5118 vcls_s32 (int32x2_t a)
5120 int32x2_t result;
5121 __asm__ ("cls %0.2s,%1.2s"
5122 : "=w"(result)
5123 : "w"(a)
5124 : /* No clobbers */);
5125 return result;
5128 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5129 vclsq_s8 (int8x16_t a)
5131 int8x16_t result;
5132 __asm__ ("cls %0.16b,%1.16b"
5133 : "=w"(result)
5134 : "w"(a)
5135 : /* No clobbers */);
5136 return result;
5139 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5140 vclsq_s16 (int16x8_t a)
5142 int16x8_t result;
5143 __asm__ ("cls %0.8h,%1.8h"
5144 : "=w"(result)
5145 : "w"(a)
5146 : /* No clobbers */);
5147 return result;
5150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5151 vclsq_s32 (int32x4_t a)
5153 int32x4_t result;
5154 __asm__ ("cls %0.4s,%1.4s"
5155 : "=w"(result)
5156 : "w"(a)
5157 : /* No clobbers */);
5158 return result;
5161 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5162 vcnt_p8 (poly8x8_t a)
5164 poly8x8_t result;
5165 __asm__ ("cnt %0.8b,%1.8b"
5166 : "=w"(result)
5167 : "w"(a)
5168 : /* No clobbers */);
5169 return result;
5172 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5173 vcnt_s8 (int8x8_t a)
5175 int8x8_t result;
5176 __asm__ ("cnt %0.8b,%1.8b"
5177 : "=w"(result)
5178 : "w"(a)
5179 : /* No clobbers */);
5180 return result;
5183 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5184 vcnt_u8 (uint8x8_t a)
5186 uint8x8_t result;
5187 __asm__ ("cnt %0.8b,%1.8b"
5188 : "=w"(result)
5189 : "w"(a)
5190 : /* No clobbers */);
5191 return result;
5194 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5195 vcntq_p8 (poly8x16_t a)
5197 poly8x16_t result;
5198 __asm__ ("cnt %0.16b,%1.16b"
5199 : "=w"(result)
5200 : "w"(a)
5201 : /* No clobbers */);
5202 return result;
5205 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5206 vcntq_s8 (int8x16_t a)
5208 int8x16_t result;
5209 __asm__ ("cnt %0.16b,%1.16b"
5210 : "=w"(result)
5211 : "w"(a)
5212 : /* No clobbers */);
5213 return result;
5216 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5217 vcntq_u8 (uint8x16_t a)
5219 uint8x16_t result;
5220 __asm__ ("cnt %0.16b,%1.16b"
5221 : "=w"(result)
5222 : "w"(a)
5223 : /* No clobbers */);
5224 return result;
5227 #define vcopyq_lane_f32(a, b, c, d) \
5228 __extension__ \
5229 ({ \
5230 float32x4_t c_ = (c); \
5231 float32x4_t a_ = (a); \
5232 float32x4_t result; \
5233 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5234 : "=w"(result) \
5235 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5236 : /* No clobbers */); \
5237 result; \
5240 #define vcopyq_lane_f64(a, b, c, d) \
5241 __extension__ \
5242 ({ \
5243 float64x2_t c_ = (c); \
5244 float64x2_t a_ = (a); \
5245 float64x2_t result; \
5246 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5247 : "=w"(result) \
5248 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5249 : /* No clobbers */); \
5250 result; \
5253 #define vcopyq_lane_p8(a, b, c, d) \
5254 __extension__ \
5255 ({ \
5256 poly8x16_t c_ = (c); \
5257 poly8x16_t a_ = (a); \
5258 poly8x16_t result; \
5259 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5260 : "=w"(result) \
5261 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5262 : /* No clobbers */); \
5263 result; \
5266 #define vcopyq_lane_p16(a, b, c, d) \
5267 __extension__ \
5268 ({ \
5269 poly16x8_t c_ = (c); \
5270 poly16x8_t a_ = (a); \
5271 poly16x8_t result; \
5272 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5273 : "=w"(result) \
5274 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5275 : /* No clobbers */); \
5276 result; \
5279 #define vcopyq_lane_s8(a, b, c, d) \
5280 __extension__ \
5281 ({ \
5282 int8x16_t c_ = (c); \
5283 int8x16_t a_ = (a); \
5284 int8x16_t result; \
5285 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5286 : "=w"(result) \
5287 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5288 : /* No clobbers */); \
5289 result; \
5292 #define vcopyq_lane_s16(a, b, c, d) \
5293 __extension__ \
5294 ({ \
5295 int16x8_t c_ = (c); \
5296 int16x8_t a_ = (a); \
5297 int16x8_t result; \
5298 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5299 : "=w"(result) \
5300 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5301 : /* No clobbers */); \
5302 result; \
5305 #define vcopyq_lane_s32(a, b, c, d) \
5306 __extension__ \
5307 ({ \
5308 int32x4_t c_ = (c); \
5309 int32x4_t a_ = (a); \
5310 int32x4_t result; \
5311 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5312 : "=w"(result) \
5313 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5314 : /* No clobbers */); \
5315 result; \
5318 #define vcopyq_lane_s64(a, b, c, d) \
5319 __extension__ \
5320 ({ \
5321 int64x2_t c_ = (c); \
5322 int64x2_t a_ = (a); \
5323 int64x2_t result; \
5324 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5325 : "=w"(result) \
5326 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5327 : /* No clobbers */); \
5328 result; \
5331 #define vcopyq_lane_u8(a, b, c, d) \
5332 __extension__ \
5333 ({ \
5334 uint8x16_t c_ = (c); \
5335 uint8x16_t a_ = (a); \
5336 uint8x16_t result; \
5337 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5338 : "=w"(result) \
5339 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5340 : /* No clobbers */); \
5341 result; \
5344 #define vcopyq_lane_u16(a, b, c, d) \
5345 __extension__ \
5346 ({ \
5347 uint16x8_t c_ = (c); \
5348 uint16x8_t a_ = (a); \
5349 uint16x8_t result; \
5350 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5351 : "=w"(result) \
5352 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5353 : /* No clobbers */); \
5354 result; \
5357 #define vcopyq_lane_u32(a, b, c, d) \
5358 __extension__ \
5359 ({ \
5360 uint32x4_t c_ = (c); \
5361 uint32x4_t a_ = (a); \
5362 uint32x4_t result; \
5363 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5364 : "=w"(result) \
5365 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5366 : /* No clobbers */); \
5367 result; \
5370 #define vcopyq_lane_u64(a, b, c, d) \
5371 __extension__ \
5372 ({ \
5373 uint64x2_t c_ = (c); \
5374 uint64x2_t a_ = (a); \
5375 uint64x2_t result; \
5376 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5377 : "=w"(result) \
5378 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5379 : /* No clobbers */); \
5380 result; \
5383 /* vcvt_f16_f32 not supported */
5385 /* vcvt_f32_f16 not supported */
5387 /* vcvt_high_f16_f32 not supported */
5389 /* vcvt_high_f32_f16 not supported */
5391 static float32x2_t vdup_n_f32 (float32_t);
5393 #define vcvt_n_f32_s32(a, b) \
5394 __extension__ \
5395 ({ \
5396 int32x2_t a_ = (a); \
5397 float32x2_t result; \
5398 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5399 : "=w"(result) \
5400 : "w"(a_), "i"(b) \
5401 : /* No clobbers */); \
5402 result; \
5405 #define vcvt_n_f32_u32(a, b) \
5406 __extension__ \
5407 ({ \
5408 uint32x2_t a_ = (a); \
5409 float32x2_t result; \
5410 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5411 : "=w"(result) \
5412 : "w"(a_), "i"(b) \
5413 : /* No clobbers */); \
5414 result; \
5417 #define vcvt_n_s32_f32(a, b) \
5418 __extension__ \
5419 ({ \
5420 float32x2_t a_ = (a); \
5421 int32x2_t result; \
5422 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5423 : "=w"(result) \
5424 : "w"(a_), "i"(b) \
5425 : /* No clobbers */); \
5426 result; \
5429 #define vcvt_n_u32_f32(a, b) \
5430 __extension__ \
5431 ({ \
5432 float32x2_t a_ = (a); \
5433 uint32x2_t result; \
5434 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5435 : "=w"(result) \
5436 : "w"(a_), "i"(b) \
5437 : /* No clobbers */); \
5438 result; \
5441 #define vcvtd_n_f64_s64(a, b) \
5442 __extension__ \
5443 ({ \
5444 int64_t a_ = (a); \
5445 float64_t result; \
5446 __asm__ ("scvtf %d0,%d1,%2" \
5447 : "=w"(result) \
5448 : "w"(a_), "i"(b) \
5449 : /* No clobbers */); \
5450 result; \
5453 #define vcvtd_n_f64_u64(a, b) \
5454 __extension__ \
5455 ({ \
5456 uint64_t a_ = (a); \
5457 float64_t result; \
5458 __asm__ ("ucvtf %d0,%d1,%2" \
5459 : "=w"(result) \
5460 : "w"(a_), "i"(b) \
5461 : /* No clobbers */); \
5462 result; \
5465 #define vcvtd_n_s64_f64(a, b) \
5466 __extension__ \
5467 ({ \
5468 float64_t a_ = (a); \
5469 int64_t result; \
5470 __asm__ ("fcvtzs %d0,%d1,%2" \
5471 : "=w"(result) \
5472 : "w"(a_), "i"(b) \
5473 : /* No clobbers */); \
5474 result; \
5477 #define vcvtd_n_u64_f64(a, b) \
5478 __extension__ \
5479 ({ \
5480 float64_t a_ = (a); \
5481 uint64_t result; \
5482 __asm__ ("fcvtzu %d0,%d1,%2" \
5483 : "=w"(result) \
5484 : "w"(a_), "i"(b) \
5485 : /* No clobbers */); \
5486 result; \
5489 #define vcvtq_n_f32_s32(a, b) \
5490 __extension__ \
5491 ({ \
5492 int32x4_t a_ = (a); \
5493 float32x4_t result; \
5494 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5495 : "=w"(result) \
5496 : "w"(a_), "i"(b) \
5497 : /* No clobbers */); \
5498 result; \
5501 #define vcvtq_n_f32_u32(a, b) \
5502 __extension__ \
5503 ({ \
5504 uint32x4_t a_ = (a); \
5505 float32x4_t result; \
5506 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5507 : "=w"(result) \
5508 : "w"(a_), "i"(b) \
5509 : /* No clobbers */); \
5510 result; \
5513 #define vcvtq_n_f64_s64(a, b) \
5514 __extension__ \
5515 ({ \
5516 int64x2_t a_ = (a); \
5517 float64x2_t result; \
5518 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5519 : "=w"(result) \
5520 : "w"(a_), "i"(b) \
5521 : /* No clobbers */); \
5522 result; \
5525 #define vcvtq_n_f64_u64(a, b) \
5526 __extension__ \
5527 ({ \
5528 uint64x2_t a_ = (a); \
5529 float64x2_t result; \
5530 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5531 : "=w"(result) \
5532 : "w"(a_), "i"(b) \
5533 : /* No clobbers */); \
5534 result; \
5537 #define vcvtq_n_s32_f32(a, b) \
5538 __extension__ \
5539 ({ \
5540 float32x4_t a_ = (a); \
5541 int32x4_t result; \
5542 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5543 : "=w"(result) \
5544 : "w"(a_), "i"(b) \
5545 : /* No clobbers */); \
5546 result; \
5549 #define vcvtq_n_s64_f64(a, b) \
5550 __extension__ \
5551 ({ \
5552 float64x2_t a_ = (a); \
5553 int64x2_t result; \
5554 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5555 : "=w"(result) \
5556 : "w"(a_), "i"(b) \
5557 : /* No clobbers */); \
5558 result; \
5561 #define vcvtq_n_u32_f32(a, b) \
5562 __extension__ \
5563 ({ \
5564 float32x4_t a_ = (a); \
5565 uint32x4_t result; \
5566 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5567 : "=w"(result) \
5568 : "w"(a_), "i"(b) \
5569 : /* No clobbers */); \
5570 result; \
5573 #define vcvtq_n_u64_f64(a, b) \
5574 __extension__ \
5575 ({ \
5576 float64x2_t a_ = (a); \
5577 uint64x2_t result; \
5578 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5579 : "=w"(result) \
5580 : "w"(a_), "i"(b) \
5581 : /* No clobbers */); \
5582 result; \
5585 #define vcvts_n_f32_s32(a, b) \
5586 __extension__ \
5587 ({ \
5588 int32_t a_ = (a); \
5589 float32_t result; \
5590 __asm__ ("scvtf %s0,%s1,%2" \
5591 : "=w"(result) \
5592 : "w"(a_), "i"(b) \
5593 : /* No clobbers */); \
5594 result; \
5597 #define vcvts_n_f32_u32(a, b) \
5598 __extension__ \
5599 ({ \
5600 uint32_t a_ = (a); \
5601 float32_t result; \
5602 __asm__ ("ucvtf %s0,%s1,%2" \
5603 : "=w"(result) \
5604 : "w"(a_), "i"(b) \
5605 : /* No clobbers */); \
5606 result; \
5609 #define vcvts_n_s32_f32(a, b) \
5610 __extension__ \
5611 ({ \
5612 float32_t a_ = (a); \
5613 int32_t result; \
5614 __asm__ ("fcvtzs %s0,%s1,%2" \
5615 : "=w"(result) \
5616 : "w"(a_), "i"(b) \
5617 : /* No clobbers */); \
5618 result; \
5621 #define vcvts_n_u32_f32(a, b) \
5622 __extension__ \
5623 ({ \
5624 float32_t a_ = (a); \
5625 uint32_t result; \
5626 __asm__ ("fcvtzu %s0,%s1,%2" \
5627 : "=w"(result) \
5628 : "w"(a_), "i"(b) \
5629 : /* No clobbers */); \
5630 result; \
5633 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5634 vcvtx_f32_f64 (float64x2_t a)
5636 float32x2_t result;
5637 __asm__ ("fcvtxn %0.2s,%1.2d"
5638 : "=w"(result)
5639 : "w"(a)
5640 : /* No clobbers */);
5641 return result;
5644 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5645 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5647 float32x4_t result;
5648 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5649 : "=w"(result)
5650 : "w" (b), "0"(a)
5651 : /* No clobbers */);
5652 return result;
5655 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5656 vcvtxd_f32_f64 (float64_t a)
5658 float32_t result;
5659 __asm__ ("fcvtxn %s0,%d1"
5660 : "=w"(result)
5661 : "w"(a)
5662 : /* No clobbers */);
5663 return result;
5666 #define vext_f32(a, b, c) \
5667 __extension__ \
5668 ({ \
5669 float32x2_t b_ = (b); \
5670 float32x2_t a_ = (a); \
5671 float32x2_t result; \
5672 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5673 : "=w"(result) \
5674 : "w"(a_), "w"(b_), "i"(c) \
5675 : /* No clobbers */); \
5676 result; \
5679 #define vext_f64(a, b, c) \
5680 __extension__ \
5681 ({ \
5682 float64x1_t b_ = (b); \
5683 float64x1_t a_ = (a); \
5684 float64x1_t result; \
5685 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5686 : "=w"(result) \
5687 : "w"(a_), "w"(b_), "i"(c) \
5688 : /* No clobbers */); \
5689 result; \
5692 #define vext_p8(a, b, c) \
5693 __extension__ \
5694 ({ \
5695 poly8x8_t b_ = (b); \
5696 poly8x8_t a_ = (a); \
5697 poly8x8_t result; \
5698 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5699 : "=w"(result) \
5700 : "w"(a_), "w"(b_), "i"(c) \
5701 : /* No clobbers */); \
5702 result; \
5705 #define vext_p16(a, b, c) \
5706 __extension__ \
5707 ({ \
5708 poly16x4_t b_ = (b); \
5709 poly16x4_t a_ = (a); \
5710 poly16x4_t result; \
5711 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5712 : "=w"(result) \
5713 : "w"(a_), "w"(b_), "i"(c) \
5714 : /* No clobbers */); \
5715 result; \
5718 #define vext_s8(a, b, c) \
5719 __extension__ \
5720 ({ \
5721 int8x8_t b_ = (b); \
5722 int8x8_t a_ = (a); \
5723 int8x8_t result; \
5724 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5725 : "=w"(result) \
5726 : "w"(a_), "w"(b_), "i"(c) \
5727 : /* No clobbers */); \
5728 result; \
5731 #define vext_s16(a, b, c) \
5732 __extension__ \
5733 ({ \
5734 int16x4_t b_ = (b); \
5735 int16x4_t a_ = (a); \
5736 int16x4_t result; \
5737 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5738 : "=w"(result) \
5739 : "w"(a_), "w"(b_), "i"(c) \
5740 : /* No clobbers */); \
5741 result; \
5744 #define vext_s32(a, b, c) \
5745 __extension__ \
5746 ({ \
5747 int32x2_t b_ = (b); \
5748 int32x2_t a_ = (a); \
5749 int32x2_t result; \
5750 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5751 : "=w"(result) \
5752 : "w"(a_), "w"(b_), "i"(c) \
5753 : /* No clobbers */); \
5754 result; \
5757 #define vext_s64(a, b, c) \
5758 __extension__ \
5759 ({ \
5760 int64x1_t b_ = (b); \
5761 int64x1_t a_ = (a); \
5762 int64x1_t result; \
5763 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5764 : "=w"(result) \
5765 : "w"(a_), "w"(b_), "i"(c) \
5766 : /* No clobbers */); \
5767 result; \
5770 #define vext_u8(a, b, c) \
5771 __extension__ \
5772 ({ \
5773 uint8x8_t b_ = (b); \
5774 uint8x8_t a_ = (a); \
5775 uint8x8_t result; \
5776 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5777 : "=w"(result) \
5778 : "w"(a_), "w"(b_), "i"(c) \
5779 : /* No clobbers */); \
5780 result; \
5783 #define vext_u16(a, b, c) \
5784 __extension__ \
5785 ({ \
5786 uint16x4_t b_ = (b); \
5787 uint16x4_t a_ = (a); \
5788 uint16x4_t result; \
5789 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5790 : "=w"(result) \
5791 : "w"(a_), "w"(b_), "i"(c) \
5792 : /* No clobbers */); \
5793 result; \
5796 #define vext_u32(a, b, c) \
5797 __extension__ \
5798 ({ \
5799 uint32x2_t b_ = (b); \
5800 uint32x2_t a_ = (a); \
5801 uint32x2_t result; \
5802 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5803 : "=w"(result) \
5804 : "w"(a_), "w"(b_), "i"(c) \
5805 : /* No clobbers */); \
5806 result; \
5809 #define vext_u64(a, b, c) \
5810 __extension__ \
5811 ({ \
5812 uint64x1_t b_ = (b); \
5813 uint64x1_t a_ = (a); \
5814 uint64x1_t result; \
5815 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5816 : "=w"(result) \
5817 : "w"(a_), "w"(b_), "i"(c) \
5818 : /* No clobbers */); \
5819 result; \
5822 #define vextq_f32(a, b, c) \
5823 __extension__ \
5824 ({ \
5825 float32x4_t b_ = (b); \
5826 float32x4_t a_ = (a); \
5827 float32x4_t result; \
5828 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5829 : "=w"(result) \
5830 : "w"(a_), "w"(b_), "i"(c) \
5831 : /* No clobbers */); \
5832 result; \
5835 #define vextq_f64(a, b, c) \
5836 __extension__ \
5837 ({ \
5838 float64x2_t b_ = (b); \
5839 float64x2_t a_ = (a); \
5840 float64x2_t result; \
5841 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5842 : "=w"(result) \
5843 : "w"(a_), "w"(b_), "i"(c) \
5844 : /* No clobbers */); \
5845 result; \
5848 #define vextq_p8(a, b, c) \
5849 __extension__ \
5850 ({ \
5851 poly8x16_t b_ = (b); \
5852 poly8x16_t a_ = (a); \
5853 poly8x16_t result; \
5854 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5855 : "=w"(result) \
5856 : "w"(a_), "w"(b_), "i"(c) \
5857 : /* No clobbers */); \
5858 result; \
5861 #define vextq_p16(a, b, c) \
5862 __extension__ \
5863 ({ \
5864 poly16x8_t b_ = (b); \
5865 poly16x8_t a_ = (a); \
5866 poly16x8_t result; \
5867 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5868 : "=w"(result) \
5869 : "w"(a_), "w"(b_), "i"(c) \
5870 : /* No clobbers */); \
5871 result; \
5874 #define vextq_s8(a, b, c) \
5875 __extension__ \
5876 ({ \
5877 int8x16_t b_ = (b); \
5878 int8x16_t a_ = (a); \
5879 int8x16_t result; \
5880 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5881 : "=w"(result) \
5882 : "w"(a_), "w"(b_), "i"(c) \
5883 : /* No clobbers */); \
5884 result; \
5887 #define vextq_s16(a, b, c) \
5888 __extension__ \
5889 ({ \
5890 int16x8_t b_ = (b); \
5891 int16x8_t a_ = (a); \
5892 int16x8_t result; \
5893 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5894 : "=w"(result) \
5895 : "w"(a_), "w"(b_), "i"(c) \
5896 : /* No clobbers */); \
5897 result; \
5900 #define vextq_s32(a, b, c) \
5901 __extension__ \
5902 ({ \
5903 int32x4_t b_ = (b); \
5904 int32x4_t a_ = (a); \
5905 int32x4_t result; \
5906 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5907 : "=w"(result) \
5908 : "w"(a_), "w"(b_), "i"(c) \
5909 : /* No clobbers */); \
5910 result; \
5913 #define vextq_s64(a, b, c) \
5914 __extension__ \
5915 ({ \
5916 int64x2_t b_ = (b); \
5917 int64x2_t a_ = (a); \
5918 int64x2_t result; \
5919 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5920 : "=w"(result) \
5921 : "w"(a_), "w"(b_), "i"(c) \
5922 : /* No clobbers */); \
5923 result; \
5926 #define vextq_u8(a, b, c) \
5927 __extension__ \
5928 ({ \
5929 uint8x16_t b_ = (b); \
5930 uint8x16_t a_ = (a); \
5931 uint8x16_t result; \
5932 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5933 : "=w"(result) \
5934 : "w"(a_), "w"(b_), "i"(c) \
5935 : /* No clobbers */); \
5936 result; \
5939 #define vextq_u16(a, b, c) \
5940 __extension__ \
5941 ({ \
5942 uint16x8_t b_ = (b); \
5943 uint16x8_t a_ = (a); \
5944 uint16x8_t result; \
5945 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5946 : "=w"(result) \
5947 : "w"(a_), "w"(b_), "i"(c) \
5948 : /* No clobbers */); \
5949 result; \
5952 #define vextq_u32(a, b, c) \
5953 __extension__ \
5954 ({ \
5955 uint32x4_t b_ = (b); \
5956 uint32x4_t a_ = (a); \
5957 uint32x4_t result; \
5958 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5959 : "=w"(result) \
5960 : "w"(a_), "w"(b_), "i"(c) \
5961 : /* No clobbers */); \
5962 result; \
5965 #define vextq_u64(a, b, c) \
5966 __extension__ \
5967 ({ \
5968 uint64x2_t b_ = (b); \
5969 uint64x2_t a_ = (a); \
5970 uint64x2_t result; \
5971 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5972 : "=w"(result) \
5973 : "w"(a_), "w"(b_), "i"(c) \
5974 : /* No clobbers */); \
5975 result; \
5978 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5979 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5981 float32x2_t result;
5982 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5983 : "=w"(result)
5984 : "0"(a), "w"(b), "w"(c)
5985 : /* No clobbers */);
5986 return result;
5989 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5990 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5992 float32x4_t result;
5993 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5994 : "=w"(result)
5995 : "0"(a), "w"(b), "w"(c)
5996 : /* No clobbers */);
5997 return result;
6000 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6001 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6003 float64x2_t result;
6004 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6005 : "=w"(result)
6006 : "0"(a), "w"(b), "w"(c)
6007 : /* No clobbers */);
6008 return result;
6011 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6012 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6014 float32x2_t result;
6015 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6016 : "=w"(result)
6017 : "0"(a), "w"(b), "w"(c)
6018 : /* No clobbers */);
6019 return result;
6022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6023 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6025 float32x4_t result;
6026 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6027 : "=w"(result)
6028 : "0"(a), "w"(b), "w"(c)
6029 : /* No clobbers */);
6030 return result;
6033 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6034 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
6036 float64x2_t result;
6037 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6038 : "=w"(result)
6039 : "0"(a), "w"(b), "w"(c)
6040 : /* No clobbers */);
6041 return result;
6044 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6045 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6047 float32x2_t result;
6048 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6049 : "=w"(result)
6050 : "0"(a), "w"(b), "w"(c)
6051 : /* No clobbers */);
6052 return result;
6055 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6056 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6058 float32x4_t result;
6059 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6060 : "=w"(result)
6061 : "0"(a), "w"(b), "w"(c)
6062 : /* No clobbers */);
6063 return result;
6066 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6067 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6069 float64x2_t result;
6070 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6071 : "=w"(result)
6072 : "0"(a), "w"(b), "w"(c)
6073 : /* No clobbers */);
6074 return result;
6077 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6078 vget_high_f32 (float32x4_t a)
6080 float32x2_t result;
6081 __asm__ ("ins %0.d[0], %1.d[1]"
6082 : "=w"(result)
6083 : "w"(a)
6084 : /* No clobbers */);
6085 return result;
6088 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6089 vget_high_f64 (float64x2_t a)
6091 float64x1_t result;
6092 __asm__ ("ins %0.d[0], %1.d[1]"
6093 : "=w"(result)
6094 : "w"(a)
6095 : /* No clobbers */);
6096 return result;
6099 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6100 vget_high_p8 (poly8x16_t a)
6102 poly8x8_t result;
6103 __asm__ ("ins %0.d[0], %1.d[1]"
6104 : "=w"(result)
6105 : "w"(a)
6106 : /* No clobbers */);
6107 return result;
6110 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6111 vget_high_p16 (poly16x8_t a)
6113 poly16x4_t result;
6114 __asm__ ("ins %0.d[0], %1.d[1]"
6115 : "=w"(result)
6116 : "w"(a)
6117 : /* No clobbers */);
6118 return result;
6121 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6122 vget_high_s8 (int8x16_t a)
6124 int8x8_t result;
6125 __asm__ ("ins %0.d[0], %1.d[1]"
6126 : "=w"(result)
6127 : "w"(a)
6128 : /* No clobbers */);
6129 return result;
6132 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6133 vget_high_s16 (int16x8_t a)
6135 int16x4_t result;
6136 __asm__ ("ins %0.d[0], %1.d[1]"
6137 : "=w"(result)
6138 : "w"(a)
6139 : /* No clobbers */);
6140 return result;
6143 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6144 vget_high_s32 (int32x4_t a)
6146 int32x2_t result;
6147 __asm__ ("ins %0.d[0], %1.d[1]"
6148 : "=w"(result)
6149 : "w"(a)
6150 : /* No clobbers */);
6151 return result;
6154 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6155 vget_high_s64 (int64x2_t a)
6157 int64x1_t result;
6158 __asm__ ("ins %0.d[0], %1.d[1]"
6159 : "=w"(result)
6160 : "w"(a)
6161 : /* No clobbers */);
6162 return result;
6165 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6166 vget_high_u8 (uint8x16_t a)
6168 uint8x8_t result;
6169 __asm__ ("ins %0.d[0], %1.d[1]"
6170 : "=w"(result)
6171 : "w"(a)
6172 : /* No clobbers */);
6173 return result;
6176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6177 vget_high_u16 (uint16x8_t a)
6179 uint16x4_t result;
6180 __asm__ ("ins %0.d[0], %1.d[1]"
6181 : "=w"(result)
6182 : "w"(a)
6183 : /* No clobbers */);
6184 return result;
6187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6188 vget_high_u32 (uint32x4_t a)
6190 uint32x2_t result;
6191 __asm__ ("ins %0.d[0], %1.d[1]"
6192 : "=w"(result)
6193 : "w"(a)
6194 : /* No clobbers */);
6195 return result;
6198 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6199 vget_high_u64 (uint64x2_t a)
6201 uint64x1_t result;
6202 __asm__ ("ins %0.d[0], %1.d[1]"
6203 : "=w"(result)
6204 : "w"(a)
6205 : /* No clobbers */);
6206 return result;
6209 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6210 vhsub_s8 (int8x8_t a, int8x8_t b)
6212 int8x8_t result;
6213 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6214 : "=w"(result)
6215 : "w"(a), "w"(b)
6216 : /* No clobbers */);
6217 return result;
6220 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6221 vhsub_s16 (int16x4_t a, int16x4_t b)
6223 int16x4_t result;
6224 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6225 : "=w"(result)
6226 : "w"(a), "w"(b)
6227 : /* No clobbers */);
6228 return result;
6231 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6232 vhsub_s32 (int32x2_t a, int32x2_t b)
6234 int32x2_t result;
6235 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6236 : "=w"(result)
6237 : "w"(a), "w"(b)
6238 : /* No clobbers */);
6239 return result;
6242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6243 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6245 uint8x8_t result;
6246 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6247 : "=w"(result)
6248 : "w"(a), "w"(b)
6249 : /* No clobbers */);
6250 return result;
6253 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6254 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6256 uint16x4_t result;
6257 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6258 : "=w"(result)
6259 : "w"(a), "w"(b)
6260 : /* No clobbers */);
6261 return result;
6264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6265 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6267 uint32x2_t result;
6268 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6269 : "=w"(result)
6270 : "w"(a), "w"(b)
6271 : /* No clobbers */);
6272 return result;
6275 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6276 vhsubq_s8 (int8x16_t a, int8x16_t b)
6278 int8x16_t result;
6279 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6280 : "=w"(result)
6281 : "w"(a), "w"(b)
6282 : /* No clobbers */);
6283 return result;
6286 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6287 vhsubq_s16 (int16x8_t a, int16x8_t b)
6289 int16x8_t result;
6290 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6291 : "=w"(result)
6292 : "w"(a), "w"(b)
6293 : /* No clobbers */);
6294 return result;
6297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6298 vhsubq_s32 (int32x4_t a, int32x4_t b)
6300 int32x4_t result;
6301 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6302 : "=w"(result)
6303 : "w"(a), "w"(b)
6304 : /* No clobbers */);
6305 return result;
6308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6309 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6311 uint8x16_t result;
6312 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6313 : "=w"(result)
6314 : "w"(a), "w"(b)
6315 : /* No clobbers */);
6316 return result;
6319 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6320 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6322 uint16x8_t result;
6323 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6324 : "=w"(result)
6325 : "w"(a), "w"(b)
6326 : /* No clobbers */);
6327 return result;
6330 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6331 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6333 uint32x4_t result;
6334 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6335 : "=w"(result)
6336 : "w"(a), "w"(b)
6337 : /* No clobbers */);
6338 return result;
6341 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6342 vld1_dup_f32 (const float32_t * a)
6344 float32x2_t result;
6345 __asm__ ("ld1r {%0.2s}, %1"
6346 : "=w"(result)
6347 : "Utv"(*a)
6348 : /* No clobbers */);
6349 return result;
6352 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6353 vld1_dup_f64 (const float64_t * a)
6355 float64x1_t result;
6356 __asm__ ("ld1r {%0.1d}, %1"
6357 : "=w"(result)
6358 : "Utv"(*a)
6359 : /* No clobbers */);
6360 return result;
6363 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6364 vld1_dup_p8 (const poly8_t * a)
6366 poly8x8_t result;
6367 __asm__ ("ld1r {%0.8b}, %1"
6368 : "=w"(result)
6369 : "Utv"(*a)
6370 : /* No clobbers */);
6371 return result;
6374 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6375 vld1_dup_p16 (const poly16_t * a)
6377 poly16x4_t result;
6378 __asm__ ("ld1r {%0.4h}, %1"
6379 : "=w"(result)
6380 : "Utv"(*a)
6381 : /* No clobbers */);
6382 return result;
6385 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6386 vld1_dup_s8 (const int8_t * a)
6388 int8x8_t result;
6389 __asm__ ("ld1r {%0.8b}, %1"
6390 : "=w"(result)
6391 : "Utv"(*a)
6392 : /* No clobbers */);
6393 return result;
6396 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6397 vld1_dup_s16 (const int16_t * a)
6399 int16x4_t result;
6400 __asm__ ("ld1r {%0.4h}, %1"
6401 : "=w"(result)
6402 : "Utv"(*a)
6403 : /* No clobbers */);
6404 return result;
6407 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6408 vld1_dup_s32 (const int32_t * a)
6410 int32x2_t result;
6411 __asm__ ("ld1r {%0.2s}, %1"
6412 : "=w"(result)
6413 : "Utv"(*a)
6414 : /* No clobbers */);
6415 return result;
6418 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6419 vld1_dup_s64 (const int64_t * a)
6421 int64x1_t result;
6422 __asm__ ("ld1r {%0.1d}, %1"
6423 : "=w"(result)
6424 : "Utv"(*a)
6425 : /* No clobbers */);
6426 return result;
6429 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6430 vld1_dup_u8 (const uint8_t * a)
6432 uint8x8_t result;
6433 __asm__ ("ld1r {%0.8b}, %1"
6434 : "=w"(result)
6435 : "Utv"(*a)
6436 : /* No clobbers */);
6437 return result;
6440 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6441 vld1_dup_u16 (const uint16_t * a)
6443 uint16x4_t result;
6444 __asm__ ("ld1r {%0.4h}, %1"
6445 : "=w"(result)
6446 : "Utv"(*a)
6447 : /* No clobbers */);
6448 return result;
6451 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6452 vld1_dup_u32 (const uint32_t * a)
6454 uint32x2_t result;
6455 __asm__ ("ld1r {%0.2s}, %1"
6456 : "=w"(result)
6457 : "Utv"(*a)
6458 : /* No clobbers */);
6459 return result;
6462 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6463 vld1_dup_u64 (const uint64_t * a)
6465 uint64x1_t result;
6466 __asm__ ("ld1r {%0.1d}, %1"
6467 : "=w"(result)
6468 : "Utv"(*a)
6469 : /* No clobbers */);
6470 return result;
6473 #define vld1_lane_f32(a, b, c) \
6474 __extension__ \
6475 ({ \
6476 float32x2_t b_ = (b); \
6477 const float32_t * a_ = (a); \
6478 float32x2_t result; \
6479 __asm__ ("ld1 {%0.s}[%1], %2" \
6480 : "=w"(result) \
6481 : "i" (c), "Utv"(*a_), "0"(b_) \
6482 : /* No clobbers */); \
6483 result; \
6486 #define vld1_lane_f64(a, b, c) \
6487 __extension__ \
6488 ({ \
6489 float64x1_t b_ = (b); \
6490 const float64_t * a_ = (a); \
6491 float64x1_t result; \
6492 __asm__ ("ld1 {%0.d}[%1], %2" \
6493 : "=w"(result) \
6494 : "i" (c), "Utv"(*a_), "0"(b_) \
6495 : /* No clobbers */); \
6496 result; \
6499 #define vld1_lane_p8(a, b, c) \
6500 __extension__ \
6501 ({ \
6502 poly8x8_t b_ = (b); \
6503 const poly8_t * a_ = (a); \
6504 poly8x8_t result; \
6505 __asm__ ("ld1 {%0.b}[%1], %2" \
6506 : "=w"(result) \
6507 : "i" (c), "Utv"(*a_), "0"(b_) \
6508 : /* No clobbers */); \
6509 result; \
6512 #define vld1_lane_p16(a, b, c) \
6513 __extension__ \
6514 ({ \
6515 poly16x4_t b_ = (b); \
6516 const poly16_t * a_ = (a); \
6517 poly16x4_t result; \
6518 __asm__ ("ld1 {%0.h}[%1], %2" \
6519 : "=w"(result) \
6520 : "i" (c), "Utv"(*a_), "0"(b_) \
6521 : /* No clobbers */); \
6522 result; \
6525 #define vld1_lane_s8(a, b, c) \
6526 __extension__ \
6527 ({ \
6528 int8x8_t b_ = (b); \
6529 const int8_t * a_ = (a); \
6530 int8x8_t result; \
6531 __asm__ ("ld1 {%0.b}[%1], %2" \
6532 : "=w"(result) \
6533 : "i" (c), "Utv"(*a_), "0"(b_) \
6534 : /* No clobbers */); \
6535 result; \
6538 #define vld1_lane_s16(a, b, c) \
6539 __extension__ \
6540 ({ \
6541 int16x4_t b_ = (b); \
6542 const int16_t * a_ = (a); \
6543 int16x4_t result; \
6544 __asm__ ("ld1 {%0.h}[%1], %2" \
6545 : "=w"(result) \
6546 : "i" (c), "Utv"(*a_), "0"(b_) \
6547 : /* No clobbers */); \
6548 result; \
6551 #define vld1_lane_s32(a, b, c) \
6552 __extension__ \
6553 ({ \
6554 int32x2_t b_ = (b); \
6555 const int32_t * a_ = (a); \
6556 int32x2_t result; \
6557 __asm__ ("ld1 {%0.s}[%1], %2" \
6558 : "=w"(result) \
6559 : "i" (c), "Utv"(*a_), "0"(b_) \
6560 : /* No clobbers */); \
6561 result; \
6564 #define vld1_lane_s64(a, b, c) \
6565 __extension__ \
6566 ({ \
6567 int64x1_t b_ = (b); \
6568 const int64_t * a_ = (a); \
6569 int64x1_t result; \
6570 __asm__ ("ld1 {%0.d}[%1], %2" \
6571 : "=w"(result) \
6572 : "i" (c), "Utv"(*a_), "0"(b_) \
6573 : /* No clobbers */); \
6574 result; \
6577 #define vld1_lane_u8(a, b, c) \
6578 __extension__ \
6579 ({ \
6580 uint8x8_t b_ = (b); \
6581 const uint8_t * a_ = (a); \
6582 uint8x8_t result; \
6583 __asm__ ("ld1 {%0.b}[%1], %2" \
6584 : "=w"(result) \
6585 : "i" (c), "Utv"(*a_), "0"(b_) \
6586 : /* No clobbers */); \
6587 result; \
6590 #define vld1_lane_u16(a, b, c) \
6591 __extension__ \
6592 ({ \
6593 uint16x4_t b_ = (b); \
6594 const uint16_t * a_ = (a); \
6595 uint16x4_t result; \
6596 __asm__ ("ld1 {%0.h}[%1], %2" \
6597 : "=w"(result) \
6598 : "i" (c), "Utv"(*a_), "0"(b_) \
6599 : /* No clobbers */); \
6600 result; \
6603 #define vld1_lane_u32(a, b, c) \
6604 __extension__ \
6605 ({ \
6606 uint32x2_t b_ = (b); \
6607 const uint32_t * a_ = (a); \
6608 uint32x2_t result; \
6609 __asm__ ("ld1 {%0.s}[%1], %2" \
6610 : "=w"(result) \
6611 : "i" (c), "Utv"(*a_), "0"(b_) \
6612 : /* No clobbers */); \
6613 result; \
6616 #define vld1_lane_u64(a, b, c) \
6617 __extension__ \
6618 ({ \
6619 uint64x1_t b_ = (b); \
6620 const uint64_t * a_ = (a); \
6621 uint64x1_t result; \
6622 __asm__ ("ld1 {%0.d}[%1], %2" \
6623 : "=w"(result) \
6624 : "i" (c), "Utv"(*a_), "0"(b_) \
6625 : /* No clobbers */); \
6626 result; \
6629 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6630 vld1q_dup_f32 (const float32_t * a)
6632 float32x4_t result;
6633 __asm__ ("ld1r {%0.4s}, %1"
6634 : "=w"(result)
6635 : "Utv"(*a)
6636 : /* No clobbers */);
6637 return result;
6640 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6641 vld1q_dup_f64 (const float64_t * a)
6643 float64x2_t result;
6644 __asm__ ("ld1r {%0.2d}, %1"
6645 : "=w"(result)
6646 : "Utv"(*a)
6647 : /* No clobbers */);
6648 return result;
6651 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6652 vld1q_dup_p8 (const poly8_t * a)
6654 poly8x16_t result;
6655 __asm__ ("ld1r {%0.16b}, %1"
6656 : "=w"(result)
6657 : "Utv"(*a)
6658 : /* No clobbers */);
6659 return result;
6662 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6663 vld1q_dup_p16 (const poly16_t * a)
6665 poly16x8_t result;
6666 __asm__ ("ld1r {%0.8h}, %1"
6667 : "=w"(result)
6668 : "Utv"(*a)
6669 : /* No clobbers */);
6670 return result;
6673 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6674 vld1q_dup_s8 (const int8_t * a)
6676 int8x16_t result;
6677 __asm__ ("ld1r {%0.16b}, %1"
6678 : "=w"(result)
6679 : "Utv"(*a)
6680 : /* No clobbers */);
6681 return result;
6684 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6685 vld1q_dup_s16 (const int16_t * a)
6687 int16x8_t result;
6688 __asm__ ("ld1r {%0.8h}, %1"
6689 : "=w"(result)
6690 : "Utv"(*a)
6691 : /* No clobbers */);
6692 return result;
6695 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6696 vld1q_dup_s32 (const int32_t * a)
6698 int32x4_t result;
6699 __asm__ ("ld1r {%0.4s}, %1"
6700 : "=w"(result)
6701 : "Utv"(*a)
6702 : /* No clobbers */);
6703 return result;
6706 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6707 vld1q_dup_s64 (const int64_t * a)
6709 int64x2_t result;
6710 __asm__ ("ld1r {%0.2d}, %1"
6711 : "=w"(result)
6712 : "Utv"(*a)
6713 : /* No clobbers */);
6714 return result;
6717 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6718 vld1q_dup_u8 (const uint8_t * a)
6720 uint8x16_t result;
6721 __asm__ ("ld1r {%0.16b}, %1"
6722 : "=w"(result)
6723 : "Utv"(*a)
6724 : /* No clobbers */);
6725 return result;
6728 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6729 vld1q_dup_u16 (const uint16_t * a)
6731 uint16x8_t result;
6732 __asm__ ("ld1r {%0.8h}, %1"
6733 : "=w"(result)
6734 : "Utv"(*a)
6735 : /* No clobbers */);
6736 return result;
6739 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6740 vld1q_dup_u32 (const uint32_t * a)
6742 uint32x4_t result;
6743 __asm__ ("ld1r {%0.4s}, %1"
6744 : "=w"(result)
6745 : "Utv"(*a)
6746 : /* No clobbers */);
6747 return result;
6750 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6751 vld1q_dup_u64 (const uint64_t * a)
6753 uint64x2_t result;
6754 __asm__ ("ld1r {%0.2d}, %1"
6755 : "=w"(result)
6756 : "Utv"(*a)
6757 : /* No clobbers */);
6758 return result;
6761 #define vld1q_lane_f32(a, b, c) \
6762 __extension__ \
6763 ({ \
6764 float32x4_t b_ = (b); \
6765 const float32_t * a_ = (a); \
6766 float32x4_t result; \
6767 __asm__ ("ld1 {%0.s}[%1], %2" \
6768 : "=w"(result) \
6769 : "i"(c), "Utv"(*a_), "0"(b_) \
6770 : /* No clobbers */); \
6771 result; \
6774 #define vld1q_lane_f64(a, b, c) \
6775 __extension__ \
6776 ({ \
6777 float64x2_t b_ = (b); \
6778 const float64_t * a_ = (a); \
6779 float64x2_t result; \
6780 __asm__ ("ld1 {%0.d}[%1], %2" \
6781 : "=w"(result) \
6782 : "i"(c), "Utv"(*a_), "0"(b_) \
6783 : /* No clobbers */); \
6784 result; \
6787 #define vld1q_lane_p8(a, b, c) \
6788 __extension__ \
6789 ({ \
6790 poly8x16_t b_ = (b); \
6791 const poly8_t * a_ = (a); \
6792 poly8x16_t result; \
6793 __asm__ ("ld1 {%0.b}[%1], %2" \
6794 : "=w"(result) \
6795 : "i"(c), "Utv"(*a_), "0"(b_) \
6796 : /* No clobbers */); \
6797 result; \
6800 #define vld1q_lane_p16(a, b, c) \
6801 __extension__ \
6802 ({ \
6803 poly16x8_t b_ = (b); \
6804 const poly16_t * a_ = (a); \
6805 poly16x8_t result; \
6806 __asm__ ("ld1 {%0.h}[%1], %2" \
6807 : "=w"(result) \
6808 : "i"(c), "Utv"(*a_), "0"(b_) \
6809 : /* No clobbers */); \
6810 result; \
6813 #define vld1q_lane_s8(a, b, c) \
6814 __extension__ \
6815 ({ \
6816 int8x16_t b_ = (b); \
6817 const int8_t * a_ = (a); \
6818 int8x16_t result; \
6819 __asm__ ("ld1 {%0.b}[%1], %2" \
6820 : "=w"(result) \
6821 : "i"(c), "Utv"(*a_), "0"(b_) \
6822 : /* No clobbers */); \
6823 result; \
6826 #define vld1q_lane_s16(a, b, c) \
6827 __extension__ \
6828 ({ \
6829 int16x8_t b_ = (b); \
6830 const int16_t * a_ = (a); \
6831 int16x8_t result; \
6832 __asm__ ("ld1 {%0.h}[%1], %2" \
6833 : "=w"(result) \
6834 : "i"(c), "Utv"(*a_), "0"(b_) \
6835 : /* No clobbers */); \
6836 result; \
6839 #define vld1q_lane_s32(a, b, c) \
6840 __extension__ \
6841 ({ \
6842 int32x4_t b_ = (b); \
6843 const int32_t * a_ = (a); \
6844 int32x4_t result; \
6845 __asm__ ("ld1 {%0.s}[%1], %2" \
6846 : "=w"(result) \
6847 : "i"(c), "Utv"(*a_), "0"(b_) \
6848 : /* No clobbers */); \
6849 result; \
6852 #define vld1q_lane_s64(a, b, c) \
6853 __extension__ \
6854 ({ \
6855 int64x2_t b_ = (b); \
6856 const int64_t * a_ = (a); \
6857 int64x2_t result; \
6858 __asm__ ("ld1 {%0.d}[%1], %2" \
6859 : "=w"(result) \
6860 : "i"(c), "Utv"(*a_), "0"(b_) \
6861 : /* No clobbers */); \
6862 result; \
6865 #define vld1q_lane_u8(a, b, c) \
6866 __extension__ \
6867 ({ \
6868 uint8x16_t b_ = (b); \
6869 const uint8_t * a_ = (a); \
6870 uint8x16_t result; \
6871 __asm__ ("ld1 {%0.b}[%1], %2" \
6872 : "=w"(result) \
6873 : "i"(c), "Utv"(*a_), "0"(b_) \
6874 : /* No clobbers */); \
6875 result; \
6878 #define vld1q_lane_u16(a, b, c) \
6879 __extension__ \
6880 ({ \
6881 uint16x8_t b_ = (b); \
6882 const uint16_t * a_ = (a); \
6883 uint16x8_t result; \
6884 __asm__ ("ld1 {%0.h}[%1], %2" \
6885 : "=w"(result) \
6886 : "i"(c), "Utv"(*a_), "0"(b_) \
6887 : /* No clobbers */); \
6888 result; \
6891 #define vld1q_lane_u32(a, b, c) \
6892 __extension__ \
6893 ({ \
6894 uint32x4_t b_ = (b); \
6895 const uint32_t * a_ = (a); \
6896 uint32x4_t result; \
6897 __asm__ ("ld1 {%0.s}[%1], %2" \
6898 : "=w"(result) \
6899 : "i"(c), "Utv"(*a_), "0"(b_) \
6900 : /* No clobbers */); \
6901 result; \
6904 #define vld1q_lane_u64(a, b, c) \
6905 __extension__ \
6906 ({ \
6907 uint64x2_t b_ = (b); \
6908 const uint64_t * a_ = (a); \
6909 uint64x2_t result; \
6910 __asm__ ("ld1 {%0.d}[%1], %2" \
6911 : "=w"(result) \
6912 : "i"(c), "Utv"(*a_), "0"(b_) \
6913 : /* No clobbers */); \
6914 result; \
6917 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6918 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6920 float32x2_t result;
6921 float32x2_t t1;
6922 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6923 : "=w"(result), "=w"(t1)
6924 : "0"(a), "w"(b), "w"(c)
6925 : /* No clobbers */);
6926 return result;
6929 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6930 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6932 int16x4_t result;
6933 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6934 : "=w"(result)
6935 : "0"(a), "w"(b), "x"(c)
6936 : /* No clobbers */);
6937 return result;
6940 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6941 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6943 int32x2_t result;
6944 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6945 : "=w"(result)
6946 : "0"(a), "w"(b), "w"(c)
6947 : /* No clobbers */);
6948 return result;
6951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6952 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6954 uint16x4_t result;
6955 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6956 : "=w"(result)
6957 : "0"(a), "w"(b), "x"(c)
6958 : /* No clobbers */);
6959 return result;
6962 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6963 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6965 uint32x2_t result;
6966 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6967 : "=w"(result)
6968 : "0"(a), "w"(b), "w"(c)
6969 : /* No clobbers */);
6970 return result;
6973 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6974 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6976 int8x8_t result;
6977 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6978 : "=w"(result)
6979 : "0"(a), "w"(b), "w"(c)
6980 : /* No clobbers */);
6981 return result;
6984 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6985 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6987 int16x4_t result;
6988 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6989 : "=w"(result)
6990 : "0"(a), "w"(b), "w"(c)
6991 : /* No clobbers */);
6992 return result;
6995 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6996 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6998 int32x2_t result;
6999 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7000 : "=w"(result)
7001 : "0"(a), "w"(b), "w"(c)
7002 : /* No clobbers */);
7003 return result;
7006 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7007 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7009 uint8x8_t result;
7010 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7011 : "=w"(result)
7012 : "0"(a), "w"(b), "w"(c)
7013 : /* No clobbers */);
7014 return result;
7017 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7018 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7020 uint16x4_t result;
7021 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7022 : "=w"(result)
7023 : "0"(a), "w"(b), "w"(c)
7024 : /* No clobbers */);
7025 return result;
7028 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7029 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7031 uint32x2_t result;
7032 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7033 : "=w"(result)
7034 : "0"(a), "w"(b), "w"(c)
7035 : /* No clobbers */);
7036 return result;
7039 #define vmlal_high_lane_s16(a, b, c, d) \
7040 __extension__ \
7041 ({ \
7042 int16x8_t c_ = (c); \
7043 int16x8_t b_ = (b); \
7044 int32x4_t a_ = (a); \
7045 int32x4_t result; \
7046 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7047 : "=w"(result) \
7048 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7049 : /* No clobbers */); \
7050 result; \
7053 #define vmlal_high_lane_s32(a, b, c, d) \
7054 __extension__ \
7055 ({ \
7056 int32x4_t c_ = (c); \
7057 int32x4_t b_ = (b); \
7058 int64x2_t a_ = (a); \
7059 int64x2_t result; \
7060 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7061 : "=w"(result) \
7062 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7063 : /* No clobbers */); \
7064 result; \
7067 #define vmlal_high_lane_u16(a, b, c, d) \
7068 __extension__ \
7069 ({ \
7070 uint16x8_t c_ = (c); \
7071 uint16x8_t b_ = (b); \
7072 uint32x4_t a_ = (a); \
7073 uint32x4_t result; \
7074 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7075 : "=w"(result) \
7076 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7077 : /* No clobbers */); \
7078 result; \
7081 #define vmlal_high_lane_u32(a, b, c, d) \
7082 __extension__ \
7083 ({ \
7084 uint32x4_t c_ = (c); \
7085 uint32x4_t b_ = (b); \
7086 uint64x2_t a_ = (a); \
7087 uint64x2_t result; \
7088 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7089 : "=w"(result) \
7090 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7091 : /* No clobbers */); \
7092 result; \
7095 #define vmlal_high_laneq_s16(a, b, c, d) \
7096 __extension__ \
7097 ({ \
7098 int16x8_t c_ = (c); \
7099 int16x8_t b_ = (b); \
7100 int32x4_t a_ = (a); \
7101 int32x4_t result; \
7102 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7103 : "=w"(result) \
7104 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7105 : /* No clobbers */); \
7106 result; \
7109 #define vmlal_high_laneq_s32(a, b, c, d) \
7110 __extension__ \
7111 ({ \
7112 int32x4_t c_ = (c); \
7113 int32x4_t b_ = (b); \
7114 int64x2_t a_ = (a); \
7115 int64x2_t result; \
7116 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7117 : "=w"(result) \
7118 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7119 : /* No clobbers */); \
7120 result; \
7123 #define vmlal_high_laneq_u16(a, b, c, d) \
7124 __extension__ \
7125 ({ \
7126 uint16x8_t c_ = (c); \
7127 uint16x8_t b_ = (b); \
7128 uint32x4_t a_ = (a); \
7129 uint32x4_t result; \
7130 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7131 : "=w"(result) \
7132 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7133 : /* No clobbers */); \
7134 result; \
7137 #define vmlal_high_laneq_u32(a, b, c, d) \
7138 __extension__ \
7139 ({ \
7140 uint32x4_t c_ = (c); \
7141 uint32x4_t b_ = (b); \
7142 uint64x2_t a_ = (a); \
7143 uint64x2_t result; \
7144 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7145 : "=w"(result) \
7146 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7147 : /* No clobbers */); \
7148 result; \
7151 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7152 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7154 int32x4_t result;
7155 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7156 : "=w"(result)
7157 : "0"(a), "w"(b), "x"(c)
7158 : /* No clobbers */);
7159 return result;
7162 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7163 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7165 int64x2_t result;
7166 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
7167 : "=w"(result)
7168 : "0"(a), "w"(b), "w"(c)
7169 : /* No clobbers */);
7170 return result;
7173 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7174 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7176 uint32x4_t result;
7177 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
7178 : "=w"(result)
7179 : "0"(a), "w"(b), "x"(c)
7180 : /* No clobbers */);
7181 return result;
7184 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7185 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7187 uint64x2_t result;
7188 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
7189 : "=w"(result)
7190 : "0"(a), "w"(b), "w"(c)
7191 : /* No clobbers */);
7192 return result;
7195 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7196 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7198 int16x8_t result;
7199 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
7200 : "=w"(result)
7201 : "0"(a), "w"(b), "w"(c)
7202 : /* No clobbers */);
7203 return result;
7206 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7207 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7209 int32x4_t result;
7210 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
7211 : "=w"(result)
7212 : "0"(a), "w"(b), "w"(c)
7213 : /* No clobbers */);
7214 return result;
7217 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7218 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7220 int64x2_t result;
7221 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
7222 : "=w"(result)
7223 : "0"(a), "w"(b), "w"(c)
7224 : /* No clobbers */);
7225 return result;
7228 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7229 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7231 uint16x8_t result;
7232 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
7233 : "=w"(result)
7234 : "0"(a), "w"(b), "w"(c)
7235 : /* No clobbers */);
7236 return result;
7239 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7240 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7242 uint32x4_t result;
7243 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
7244 : "=w"(result)
7245 : "0"(a), "w"(b), "w"(c)
7246 : /* No clobbers */);
7247 return result;
7250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7251 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7253 uint64x2_t result;
7254 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7255 : "=w"(result)
7256 : "0"(a), "w"(b), "w"(c)
7257 : /* No clobbers */);
7258 return result;
7261 #define vmlal_lane_s16(a, b, c, d) \
7262 __extension__ \
7263 ({ \
7264 int16x4_t c_ = (c); \
7265 int16x4_t b_ = (b); \
7266 int32x4_t a_ = (a); \
7267 int32x4_t result; \
7268 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7269 : "=w"(result) \
7270 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7271 : /* No clobbers */); \
7272 result; \
7275 #define vmlal_lane_s32(a, b, c, d) \
7276 __extension__ \
7277 ({ \
7278 int32x2_t c_ = (c); \
7279 int32x2_t b_ = (b); \
7280 int64x2_t a_ = (a); \
7281 int64x2_t result; \
7282 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7283 : "=w"(result) \
7284 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7285 : /* No clobbers */); \
7286 result; \
7289 #define vmlal_lane_u16(a, b, c, d) \
7290 __extension__ \
7291 ({ \
7292 uint16x4_t c_ = (c); \
7293 uint16x4_t b_ = (b); \
7294 uint32x4_t a_ = (a); \
7295 uint32x4_t result; \
7296 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7297 : "=w"(result) \
7298 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7299 : /* No clobbers */); \
7300 result; \
7303 #define vmlal_lane_u32(a, b, c, d) \
7304 __extension__ \
7305 ({ \
7306 uint32x2_t c_ = (c); \
7307 uint32x2_t b_ = (b); \
7308 uint64x2_t a_ = (a); \
7309 uint64x2_t result; \
7310 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7311 : "=w"(result) \
7312 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7313 : /* No clobbers */); \
7314 result; \
7317 #define vmlal_laneq_s16(a, b, c, d) \
7318 __extension__ \
7319 ({ \
7320 int16x8_t c_ = (c); \
7321 int16x4_t b_ = (b); \
7322 int32x4_t a_ = (a); \
7323 int32x4_t result; \
7324 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7325 : "=w"(result) \
7326 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7327 : /* No clobbers */); \
7328 result; \
7331 #define vmlal_laneq_s32(a, b, c, d) \
7332 __extension__ \
7333 ({ \
7334 int32x4_t c_ = (c); \
7335 int32x2_t b_ = (b); \
7336 int64x2_t a_ = (a); \
7337 int64x2_t result; \
7338 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7339 : "=w"(result) \
7340 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7341 : /* No clobbers */); \
7342 result; \
7345 #define vmlal_laneq_u16(a, b, c, d) \
7346 __extension__ \
7347 ({ \
7348 uint16x8_t c_ = (c); \
7349 uint16x4_t b_ = (b); \
7350 uint32x4_t a_ = (a); \
7351 uint32x4_t result; \
7352 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7353 : "=w"(result) \
7354 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7355 : /* No clobbers */); \
7356 result; \
7359 #define vmlal_laneq_u32(a, b, c, d) \
7360 __extension__ \
7361 ({ \
7362 uint32x4_t c_ = (c); \
7363 uint32x2_t b_ = (b); \
7364 uint64x2_t a_ = (a); \
7365 uint64x2_t result; \
7366 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7367 : "=w"(result) \
7368 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7369 : /* No clobbers */); \
7370 result; \
7373 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7374 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7376 int32x4_t result;
7377 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7378 : "=w"(result)
7379 : "0"(a), "w"(b), "x"(c)
7380 : /* No clobbers */);
7381 return result;
7384 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7385 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7387 int64x2_t result;
7388 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7389 : "=w"(result)
7390 : "0"(a), "w"(b), "w"(c)
7391 : /* No clobbers */);
7392 return result;
7395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7396 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7398 uint32x4_t result;
7399 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7400 : "=w"(result)
7401 : "0"(a), "w"(b), "x"(c)
7402 : /* No clobbers */);
7403 return result;
7406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7407 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7409 uint64x2_t result;
7410 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7411 : "=w"(result)
7412 : "0"(a), "w"(b), "w"(c)
7413 : /* No clobbers */);
7414 return result;
7417 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7418 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7420 int16x8_t result;
7421 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7422 : "=w"(result)
7423 : "0"(a), "w"(b), "w"(c)
7424 : /* No clobbers */);
7425 return result;
7428 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7429 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7431 int32x4_t result;
7432 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7433 : "=w"(result)
7434 : "0"(a), "w"(b), "w"(c)
7435 : /* No clobbers */);
7436 return result;
7439 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7440 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7442 int64x2_t result;
7443 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7444 : "=w"(result)
7445 : "0"(a), "w"(b), "w"(c)
7446 : /* No clobbers */);
7447 return result;
7450 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7451 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7453 uint16x8_t result;
7454 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7455 : "=w"(result)
7456 : "0"(a), "w"(b), "w"(c)
7457 : /* No clobbers */);
7458 return result;
7461 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7462 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7464 uint32x4_t result;
7465 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7466 : "=w"(result)
7467 : "0"(a), "w"(b), "w"(c)
7468 : /* No clobbers */);
7469 return result;
7472 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7473 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7475 uint64x2_t result;
7476 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7477 : "=w"(result)
7478 : "0"(a), "w"(b), "w"(c)
7479 : /* No clobbers */);
7480 return result;
7483 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7484 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7486 float32x4_t result;
7487 float32x4_t t1;
7488 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7489 : "=w"(result), "=w"(t1)
7490 : "0"(a), "w"(b), "w"(c)
7491 : /* No clobbers */);
7492 return result;
7495 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7496 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7498 float64x2_t result;
7499 float64x2_t t1;
7500 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7501 : "=w"(result), "=w"(t1)
7502 : "0"(a), "w"(b), "w"(c)
7503 : /* No clobbers */);
7504 return result;
7507 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7508 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7510 int16x8_t result;
7511 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7512 : "=w"(result)
7513 : "0"(a), "w"(b), "x"(c)
7514 : /* No clobbers */);
7515 return result;
7518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7519 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7521 int32x4_t result;
7522 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7523 : "=w"(result)
7524 : "0"(a), "w"(b), "w"(c)
7525 : /* No clobbers */);
7526 return result;
7529 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7530 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7532 uint16x8_t result;
7533 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7534 : "=w"(result)
7535 : "0"(a), "w"(b), "x"(c)
7536 : /* No clobbers */);
7537 return result;
7540 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7541 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7543 uint32x4_t result;
7544 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7545 : "=w"(result)
7546 : "0"(a), "w"(b), "w"(c)
7547 : /* No clobbers */);
7548 return result;
7551 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7552 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7554 int8x16_t result;
7555 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7556 : "=w"(result)
7557 : "0"(a), "w"(b), "w"(c)
7558 : /* No clobbers */);
7559 return result;
7562 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7563 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7565 int16x8_t result;
7566 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7567 : "=w"(result)
7568 : "0"(a), "w"(b), "w"(c)
7569 : /* No clobbers */);
7570 return result;
7573 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7574 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7576 int32x4_t result;
7577 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7578 : "=w"(result)
7579 : "0"(a), "w"(b), "w"(c)
7580 : /* No clobbers */);
7581 return result;
7584 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7585 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7587 uint8x16_t result;
7588 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7589 : "=w"(result)
7590 : "0"(a), "w"(b), "w"(c)
7591 : /* No clobbers */);
7592 return result;
7595 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7596 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7598 uint16x8_t result;
7599 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7600 : "=w"(result)
7601 : "0"(a), "w"(b), "w"(c)
7602 : /* No clobbers */);
7603 return result;
7606 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7607 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7609 uint32x4_t result;
7610 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7611 : "=w"(result)
7612 : "0"(a), "w"(b), "w"(c)
7613 : /* No clobbers */);
7614 return result;
7617 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7618 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7620 float32x2_t result;
7621 float32x2_t t1;
7622 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7623 : "=w"(result), "=w"(t1)
7624 : "0"(a), "w"(b), "w"(c)
7625 : /* No clobbers */);
7626 return result;
7629 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7630 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7632 int16x4_t result;
7633 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7634 : "=w"(result)
7635 : "0"(a), "w"(b), "x"(c)
7636 : /* No clobbers */);
7637 return result;
7640 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7641 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7643 int32x2_t result;
7644 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7645 : "=w"(result)
7646 : "0"(a), "w"(b), "w"(c)
7647 : /* No clobbers */);
7648 return result;
7651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7652 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7654 uint16x4_t result;
7655 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7656 : "=w"(result)
7657 : "0"(a), "w"(b), "x"(c)
7658 : /* No clobbers */);
7659 return result;
7662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7663 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7665 uint32x2_t result;
7666 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7667 : "=w"(result)
7668 : "0"(a), "w"(b), "w"(c)
7669 : /* No clobbers */);
7670 return result;
7673 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7674 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7676 int8x8_t result;
7677 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7678 : "=w"(result)
7679 : "0"(a), "w"(b), "w"(c)
7680 : /* No clobbers */);
7681 return result;
7684 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7685 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7687 int16x4_t result;
7688 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7689 : "=w"(result)
7690 : "0"(a), "w"(b), "w"(c)
7691 : /* No clobbers */);
7692 return result;
7695 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7696 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7698 int32x2_t result;
7699 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7700 : "=w"(result)
7701 : "0"(a), "w"(b), "w"(c)
7702 : /* No clobbers */);
7703 return result;
7706 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7707 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7709 uint8x8_t result;
7710 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7711 : "=w"(result)
7712 : "0"(a), "w"(b), "w"(c)
7713 : /* No clobbers */);
7714 return result;
7717 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7718 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7720 uint16x4_t result;
7721 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7722 : "=w"(result)
7723 : "0"(a), "w"(b), "w"(c)
7724 : /* No clobbers */);
7725 return result;
7728 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7729 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7731 uint32x2_t result;
7732 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7733 : "=w"(result)
7734 : "0"(a), "w"(b), "w"(c)
7735 : /* No clobbers */);
7736 return result;
7739 #define vmlsl_high_lane_s16(a, b, c, d) \
7740 __extension__ \
7741 ({ \
7742 int16x8_t c_ = (c); \
7743 int16x8_t b_ = (b); \
7744 int32x4_t a_ = (a); \
7745 int32x4_t result; \
7746 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7747 : "=w"(result) \
7748 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7749 : /* No clobbers */); \
7750 result; \
7753 #define vmlsl_high_lane_s32(a, b, c, d) \
7754 __extension__ \
7755 ({ \
7756 int32x4_t c_ = (c); \
7757 int32x4_t b_ = (b); \
7758 int64x2_t a_ = (a); \
7759 int64x2_t result; \
7760 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7761 : "=w"(result) \
7762 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7763 : /* No clobbers */); \
7764 result; \
7767 #define vmlsl_high_lane_u16(a, b, c, d) \
7768 __extension__ \
7769 ({ \
7770 uint16x8_t c_ = (c); \
7771 uint16x8_t b_ = (b); \
7772 uint32x4_t a_ = (a); \
7773 uint32x4_t result; \
7774 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7775 : "=w"(result) \
7776 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7777 : /* No clobbers */); \
7778 result; \
7781 #define vmlsl_high_lane_u32(a, b, c, d) \
7782 __extension__ \
7783 ({ \
7784 uint32x4_t c_ = (c); \
7785 uint32x4_t b_ = (b); \
7786 uint64x2_t a_ = (a); \
7787 uint64x2_t result; \
7788 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7789 : "=w"(result) \
7790 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7791 : /* No clobbers */); \
7792 result; \
7795 #define vmlsl_high_laneq_s16(a, b, c, d) \
7796 __extension__ \
7797 ({ \
7798 int16x8_t c_ = (c); \
7799 int16x8_t b_ = (b); \
7800 int32x4_t a_ = (a); \
7801 int32x4_t result; \
7802 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7803 : "=w"(result) \
7804 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7805 : /* No clobbers */); \
7806 result; \
7809 #define vmlsl_high_laneq_s32(a, b, c, d) \
7810 __extension__ \
7811 ({ \
7812 int32x4_t c_ = (c); \
7813 int32x4_t b_ = (b); \
7814 int64x2_t a_ = (a); \
7815 int64x2_t result; \
7816 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7817 : "=w"(result) \
7818 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7819 : /* No clobbers */); \
7820 result; \
7823 #define vmlsl_high_laneq_u16(a, b, c, d) \
7824 __extension__ \
7825 ({ \
7826 uint16x8_t c_ = (c); \
7827 uint16x8_t b_ = (b); \
7828 uint32x4_t a_ = (a); \
7829 uint32x4_t result; \
7830 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7831 : "=w"(result) \
7832 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7833 : /* No clobbers */); \
7834 result; \
7837 #define vmlsl_high_laneq_u32(a, b, c, d) \
7838 __extension__ \
7839 ({ \
7840 uint32x4_t c_ = (c); \
7841 uint32x4_t b_ = (b); \
7842 uint64x2_t a_ = (a); \
7843 uint64x2_t result; \
7844 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7845 : "=w"(result) \
7846 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7847 : /* No clobbers */); \
7848 result; \
7851 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7852 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7854 int32x4_t result;
7855 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7856 : "=w"(result)
7857 : "0"(a), "w"(b), "x"(c)
7858 : /* No clobbers */);
7859 return result;
7862 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7863 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7865 int64x2_t result;
7866 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7867 : "=w"(result)
7868 : "0"(a), "w"(b), "w"(c)
7869 : /* No clobbers */);
7870 return result;
7873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7874 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7876 uint32x4_t result;
7877 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7878 : "=w"(result)
7879 : "0"(a), "w"(b), "x"(c)
7880 : /* No clobbers */);
7881 return result;
7884 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7885 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7887 uint64x2_t result;
7888 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7889 : "=w"(result)
7890 : "0"(a), "w"(b), "w"(c)
7891 : /* No clobbers */);
7892 return result;
7895 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7896 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7898 int16x8_t result;
7899 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7900 : "=w"(result)
7901 : "0"(a), "w"(b), "w"(c)
7902 : /* No clobbers */);
7903 return result;
7906 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7907 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7909 int32x4_t result;
7910 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7911 : "=w"(result)
7912 : "0"(a), "w"(b), "w"(c)
7913 : /* No clobbers */);
7914 return result;
7917 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7918 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7920 int64x2_t result;
7921 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7922 : "=w"(result)
7923 : "0"(a), "w"(b), "w"(c)
7924 : /* No clobbers */);
7925 return result;
7928 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7929 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7931 uint16x8_t result;
7932 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7933 : "=w"(result)
7934 : "0"(a), "w"(b), "w"(c)
7935 : /* No clobbers */);
7936 return result;
7939 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7940 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7942 uint32x4_t result;
7943 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7944 : "=w"(result)
7945 : "0"(a), "w"(b), "w"(c)
7946 : /* No clobbers */);
7947 return result;
7950 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7951 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7953 uint64x2_t result;
7954 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7955 : "=w"(result)
7956 : "0"(a), "w"(b), "w"(c)
7957 : /* No clobbers */);
7958 return result;
7961 #define vmlsl_lane_s16(a, b, c, d) \
7962 __extension__ \
7963 ({ \
7964 int16x4_t c_ = (c); \
7965 int16x4_t b_ = (b); \
7966 int32x4_t a_ = (a); \
7967 int32x4_t result; \
7968 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7969 : "=w"(result) \
7970 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7971 : /* No clobbers */); \
7972 result; \
7975 #define vmlsl_lane_s32(a, b, c, d) \
7976 __extension__ \
7977 ({ \
7978 int32x2_t c_ = (c); \
7979 int32x2_t b_ = (b); \
7980 int64x2_t a_ = (a); \
7981 int64x2_t result; \
7982 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7983 : "=w"(result) \
7984 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7985 : /* No clobbers */); \
7986 result; \
7989 #define vmlsl_lane_u16(a, b, c, d) \
7990 __extension__ \
7991 ({ \
7992 uint16x4_t c_ = (c); \
7993 uint16x4_t b_ = (b); \
7994 uint32x4_t a_ = (a); \
7995 uint32x4_t result; \
7996 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7997 : "=w"(result) \
7998 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7999 : /* No clobbers */); \
8000 result; \
8003 #define vmlsl_lane_u32(a, b, c, d) \
8004 __extension__ \
8005 ({ \
8006 uint32x2_t c_ = (c); \
8007 uint32x2_t b_ = (b); \
8008 uint64x2_t a_ = (a); \
8009 uint64x2_t result; \
8010 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8011 : "=w"(result) \
8012 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8013 : /* No clobbers */); \
8014 result; \
8017 #define vmlsl_laneq_s16(a, b, c, d) \
8018 __extension__ \
8019 ({ \
8020 int16x8_t c_ = (c); \
8021 int16x4_t b_ = (b); \
8022 int32x4_t a_ = (a); \
8023 int32x4_t result; \
8024 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
8025 : "=w"(result) \
8026 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8027 : /* No clobbers */); \
8028 result; \
8031 #define vmlsl_laneq_s32(a, b, c, d) \
8032 __extension__ \
8033 ({ \
8034 int32x4_t c_ = (c); \
8035 int32x2_t b_ = (b); \
8036 int64x2_t a_ = (a); \
8037 int64x2_t result; \
8038 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
8039 : "=w"(result) \
8040 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8041 : /* No clobbers */); \
8042 result; \
8045 #define vmlsl_laneq_u16(a, b, c, d) \
8046 __extension__ \
8047 ({ \
8048 uint16x8_t c_ = (c); \
8049 uint16x4_t b_ = (b); \
8050 uint32x4_t a_ = (a); \
8051 uint32x4_t result; \
8052 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
8053 : "=w"(result) \
8054 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8055 : /* No clobbers */); \
8056 result; \
8059 #define vmlsl_laneq_u32(a, b, c, d) \
8060 __extension__ \
8061 ({ \
8062 uint32x4_t c_ = (c); \
8063 uint32x2_t b_ = (b); \
8064 uint64x2_t a_ = (a); \
8065 uint64x2_t result; \
8066 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8067 : "=w"(result) \
8068 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8069 : /* No clobbers */); \
8070 result; \
8073 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8074 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
8076 int32x4_t result;
8077 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
8078 : "=w"(result)
8079 : "0"(a), "w"(b), "x"(c)
8080 : /* No clobbers */);
8081 return result;
8084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8085 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
8087 int64x2_t result;
8088 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
8089 : "=w"(result)
8090 : "0"(a), "w"(b), "w"(c)
8091 : /* No clobbers */);
8092 return result;
8095 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8096 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
8098 uint32x4_t result;
8099 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
8100 : "=w"(result)
8101 : "0"(a), "w"(b), "x"(c)
8102 : /* No clobbers */);
8103 return result;
8106 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8107 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
8109 uint64x2_t result;
8110 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
8111 : "=w"(result)
8112 : "0"(a), "w"(b), "w"(c)
8113 : /* No clobbers */);
8114 return result;
8117 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8118 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
8120 int16x8_t result;
8121 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
8122 : "=w"(result)
8123 : "0"(a), "w"(b), "w"(c)
8124 : /* No clobbers */);
8125 return result;
8128 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8129 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
8131 int32x4_t result;
8132 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
8133 : "=w"(result)
8134 : "0"(a), "w"(b), "w"(c)
8135 : /* No clobbers */);
8136 return result;
8139 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8140 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
8142 int64x2_t result;
8143 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
8144 : "=w"(result)
8145 : "0"(a), "w"(b), "w"(c)
8146 : /* No clobbers */);
8147 return result;
8150 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8151 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
8153 uint16x8_t result;
8154 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
8155 : "=w"(result)
8156 : "0"(a), "w"(b), "w"(c)
8157 : /* No clobbers */);
8158 return result;
8161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8162 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
8164 uint32x4_t result;
8165 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
8166 : "=w"(result)
8167 : "0"(a), "w"(b), "w"(c)
8168 : /* No clobbers */);
8169 return result;
8172 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8173 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
8175 uint64x2_t result;
8176 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
8177 : "=w"(result)
8178 : "0"(a), "w"(b), "w"(c)
8179 : /* No clobbers */);
8180 return result;
8183 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8184 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
8186 float32x4_t result;
8187 float32x4_t t1;
8188 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
8189 : "=w"(result), "=w"(t1)
8190 : "0"(a), "w"(b), "w"(c)
8191 : /* No clobbers */);
8192 return result;
8195 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8196 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
8198 float64x2_t result;
8199 float64x2_t t1;
8200 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
8201 : "=w"(result), "=w"(t1)
8202 : "0"(a), "w"(b), "x"(c)
8203 : /* No clobbers */);
8204 return result;
8207 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8208 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
8210 int16x8_t result;
8211 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8212 : "=w"(result)
8213 : "0"(a), "w"(b), "x"(c)
8214 : /* No clobbers */);
8215 return result;
8218 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8219 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8221 int32x4_t result;
8222 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8223 : "=w"(result)
8224 : "0"(a), "w"(b), "w"(c)
8225 : /* No clobbers */);
8226 return result;
8229 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8230 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8232 uint16x8_t result;
8233 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8234 : "=w"(result)
8235 : "0"(a), "w"(b), "x"(c)
8236 : /* No clobbers */);
8237 return result;
8240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8241 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8243 uint32x4_t result;
8244 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8245 : "=w"(result)
8246 : "0"(a), "w"(b), "w"(c)
8247 : /* No clobbers */);
8248 return result;
8251 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8252 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8254 int8x16_t result;
8255 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8256 : "=w"(result)
8257 : "0"(a), "w"(b), "w"(c)
8258 : /* No clobbers */);
8259 return result;
8262 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8263 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8265 int16x8_t result;
8266 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8267 : "=w"(result)
8268 : "0"(a), "w"(b), "w"(c)
8269 : /* No clobbers */);
8270 return result;
8273 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8274 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8276 int32x4_t result;
8277 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8278 : "=w"(result)
8279 : "0"(a), "w"(b), "w"(c)
8280 : /* No clobbers */);
8281 return result;
8284 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8285 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8287 uint8x16_t result;
8288 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8289 : "=w"(result)
8290 : "0"(a), "w"(b), "w"(c)
8291 : /* No clobbers */);
8292 return result;
8295 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8296 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8298 uint16x8_t result;
8299 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8300 : "=w"(result)
8301 : "0"(a), "w"(b), "w"(c)
8302 : /* No clobbers */);
8303 return result;
8306 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8307 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8309 uint32x4_t result;
8310 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8311 : "=w"(result)
8312 : "0"(a), "w"(b), "w"(c)
8313 : /* No clobbers */);
8314 return result;
8317 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8318 vmov_n_f32 (float32_t a)
8320 float32x2_t result;
8321 __asm__ ("dup %0.2s, %w1"
8322 : "=w"(result)
8323 : "r"(a)
8324 : /* No clobbers */);
8325 return result;
8328 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8329 vmov_n_p8 (uint32_t a)
8331 poly8x8_t result;
8332 __asm__ ("dup %0.8b,%w1"
8333 : "=w"(result)
8334 : "r"(a)
8335 : /* No clobbers */);
8336 return result;
8339 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
8340 vmov_n_p16 (uint32_t a)
8342 poly16x4_t result;
8343 __asm__ ("dup %0.4h,%w1"
8344 : "=w"(result)
8345 : "r"(a)
8346 : /* No clobbers */);
8347 return result;
8350 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8351 vmov_n_s8 (int32_t a)
8353 int8x8_t result;
8354 __asm__ ("dup %0.8b,%w1"
8355 : "=w"(result)
8356 : "r"(a)
8357 : /* No clobbers */);
8358 return result;
8361 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8362 vmov_n_s16 (int32_t a)
8364 int16x4_t result;
8365 __asm__ ("dup %0.4h,%w1"
8366 : "=w"(result)
8367 : "r"(a)
8368 : /* No clobbers */);
8369 return result;
8372 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8373 vmov_n_s32 (int32_t a)
8375 int32x2_t result;
8376 __asm__ ("dup %0.2s,%w1"
8377 : "=w"(result)
8378 : "r"(a)
8379 : /* No clobbers */);
8380 return result;
8383 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8384 vmov_n_s64 (int64_t a)
8386 int64x1_t result;
8387 __asm__ ("ins %0.d[0],%x1"
8388 : "=w"(result)
8389 : "r"(a)
8390 : /* No clobbers */);
8391 return result;
8394 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8395 vmov_n_u8 (uint32_t a)
8397 uint8x8_t result;
8398 __asm__ ("dup %0.8b,%w1"
8399 : "=w"(result)
8400 : "r"(a)
8401 : /* No clobbers */);
8402 return result;
8405 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8406 vmov_n_u16 (uint32_t a)
8408 uint16x4_t result;
8409 __asm__ ("dup %0.4h,%w1"
8410 : "=w"(result)
8411 : "r"(a)
8412 : /* No clobbers */);
8413 return result;
8416 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8417 vmov_n_u32 (uint32_t a)
8419 uint32x2_t result;
8420 __asm__ ("dup %0.2s,%w1"
8421 : "=w"(result)
8422 : "r"(a)
8423 : /* No clobbers */);
8424 return result;
8427 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8428 vmov_n_u64 (uint64_t a)
8430 uint64x1_t result;
8431 __asm__ ("ins %0.d[0],%x1"
8432 : "=w"(result)
8433 : "r"(a)
8434 : /* No clobbers */);
8435 return result;
8438 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8439 vmovl_high_s8 (int8x16_t a)
8441 int16x8_t result;
8442 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8443 : "=w"(result)
8444 : "w"(a)
8445 : /* No clobbers */);
8446 return result;
8449 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8450 vmovl_high_s16 (int16x8_t a)
8452 int32x4_t result;
8453 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8454 : "=w"(result)
8455 : "w"(a)
8456 : /* No clobbers */);
8457 return result;
8460 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8461 vmovl_high_s32 (int32x4_t a)
8463 int64x2_t result;
8464 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8465 : "=w"(result)
8466 : "w"(a)
8467 : /* No clobbers */);
8468 return result;
8471 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8472 vmovl_high_u8 (uint8x16_t a)
8474 uint16x8_t result;
8475 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8476 : "=w"(result)
8477 : "w"(a)
8478 : /* No clobbers */);
8479 return result;
8482 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8483 vmovl_high_u16 (uint16x8_t a)
8485 uint32x4_t result;
8486 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8487 : "=w"(result)
8488 : "w"(a)
8489 : /* No clobbers */);
8490 return result;
8493 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8494 vmovl_high_u32 (uint32x4_t a)
8496 uint64x2_t result;
8497 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8498 : "=w"(result)
8499 : "w"(a)
8500 : /* No clobbers */);
8501 return result;
8504 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8505 vmovl_s8 (int8x8_t a)
8507 int16x8_t result;
8508 __asm__ ("sshll %0.8h,%1.8b,#0"
8509 : "=w"(result)
8510 : "w"(a)
8511 : /* No clobbers */);
8512 return result;
8515 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8516 vmovl_s16 (int16x4_t a)
8518 int32x4_t result;
8519 __asm__ ("sshll %0.4s,%1.4h,#0"
8520 : "=w"(result)
8521 : "w"(a)
8522 : /* No clobbers */);
8523 return result;
8526 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8527 vmovl_s32 (int32x2_t a)
8529 int64x2_t result;
8530 __asm__ ("sshll %0.2d,%1.2s,#0"
8531 : "=w"(result)
8532 : "w"(a)
8533 : /* No clobbers */);
8534 return result;
8537 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8538 vmovl_u8 (uint8x8_t a)
8540 uint16x8_t result;
8541 __asm__ ("ushll %0.8h,%1.8b,#0"
8542 : "=w"(result)
8543 : "w"(a)
8544 : /* No clobbers */);
8545 return result;
8548 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8549 vmovl_u16 (uint16x4_t a)
8551 uint32x4_t result;
8552 __asm__ ("ushll %0.4s,%1.4h,#0"
8553 : "=w"(result)
8554 : "w"(a)
8555 : /* No clobbers */);
8556 return result;
8559 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8560 vmovl_u32 (uint32x2_t a)
8562 uint64x2_t result;
8563 __asm__ ("ushll %0.2d,%1.2s,#0"
8564 : "=w"(result)
8565 : "w"(a)
8566 : /* No clobbers */);
8567 return result;
8570 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8571 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8573 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8574 __asm__ ("xtn2 %0.16b,%1.8h"
8575 : "+w"(result)
8576 : "w"(b)
8577 : /* No clobbers */);
8578 return result;
8581 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8582 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8584 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8585 __asm__ ("xtn2 %0.8h,%1.4s"
8586 : "+w"(result)
8587 : "w"(b)
8588 : /* No clobbers */);
8589 return result;
8592 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8593 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8595 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8596 __asm__ ("xtn2 %0.4s,%1.2d"
8597 : "+w"(result)
8598 : "w"(b)
8599 : /* No clobbers */);
8600 return result;
8603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8604 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8606 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8607 __asm__ ("xtn2 %0.16b,%1.8h"
8608 : "+w"(result)
8609 : "w"(b)
8610 : /* No clobbers */);
8611 return result;
8614 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8615 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8617 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8618 __asm__ ("xtn2 %0.8h,%1.4s"
8619 : "+w"(result)
8620 : "w"(b)
8621 : /* No clobbers */);
8622 return result;
8625 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8626 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8628 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8629 __asm__ ("xtn2 %0.4s,%1.2d"
8630 : "+w"(result)
8631 : "w"(b)
8632 : /* No clobbers */);
8633 return result;
8636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8637 vmovn_s16 (int16x8_t a)
8639 int8x8_t result;
8640 __asm__ ("xtn %0.8b,%1.8h"
8641 : "=w"(result)
8642 : "w"(a)
8643 : /* No clobbers */);
8644 return result;
8647 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8648 vmovn_s32 (int32x4_t a)
8650 int16x4_t result;
8651 __asm__ ("xtn %0.4h,%1.4s"
8652 : "=w"(result)
8653 : "w"(a)
8654 : /* No clobbers */);
8655 return result;
8658 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8659 vmovn_s64 (int64x2_t a)
8661 int32x2_t result;
8662 __asm__ ("xtn %0.2s,%1.2d"
8663 : "=w"(result)
8664 : "w"(a)
8665 : /* No clobbers */);
8666 return result;
8669 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8670 vmovn_u16 (uint16x8_t a)
8672 uint8x8_t result;
8673 __asm__ ("xtn %0.8b,%1.8h"
8674 : "=w"(result)
8675 : "w"(a)
8676 : /* No clobbers */);
8677 return result;
8680 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8681 vmovn_u32 (uint32x4_t a)
8683 uint16x4_t result;
8684 __asm__ ("xtn %0.4h,%1.4s"
8685 : "=w"(result)
8686 : "w"(a)
8687 : /* No clobbers */);
8688 return result;
8691 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8692 vmovn_u64 (uint64x2_t a)
8694 uint32x2_t result;
8695 __asm__ ("xtn %0.2s,%1.2d"
8696 : "=w"(result)
8697 : "w"(a)
8698 : /* No clobbers */);
8699 return result;
8702 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8703 vmovq_n_f32 (float32_t a)
8705 float32x4_t result;
8706 __asm__ ("dup %0.4s, %w1"
8707 : "=w"(result)
8708 : "r"(a)
8709 : /* No clobbers */);
8710 return result;
8713 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8714 vmovq_n_f64 (float64_t a)
8716 return (float64x2_t) {a, a};
8719 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8720 vmovq_n_p8 (uint32_t a)
8722 poly8x16_t result;
8723 __asm__ ("dup %0.16b,%w1"
8724 : "=w"(result)
8725 : "r"(a)
8726 : /* No clobbers */);
8727 return result;
8730 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8731 vmovq_n_p16 (uint32_t a)
8733 poly16x8_t result;
8734 __asm__ ("dup %0.8h,%w1"
8735 : "=w"(result)
8736 : "r"(a)
8737 : /* No clobbers */);
8738 return result;
8741 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8742 vmovq_n_s8 (int32_t a)
8744 int8x16_t result;
8745 __asm__ ("dup %0.16b,%w1"
8746 : "=w"(result)
8747 : "r"(a)
8748 : /* No clobbers */);
8749 return result;
8752 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8753 vmovq_n_s16 (int32_t a)
8755 int16x8_t result;
8756 __asm__ ("dup %0.8h,%w1"
8757 : "=w"(result)
8758 : "r"(a)
8759 : /* No clobbers */);
8760 return result;
8763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8764 vmovq_n_s32 (int32_t a)
8766 int32x4_t result;
8767 __asm__ ("dup %0.4s,%w1"
8768 : "=w"(result)
8769 : "r"(a)
8770 : /* No clobbers */);
8771 return result;
8774 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8775 vmovq_n_s64 (int64_t a)
8777 int64x2_t result;
8778 __asm__ ("dup %0.2d,%x1"
8779 : "=w"(result)
8780 : "r"(a)
8781 : /* No clobbers */);
8782 return result;
8785 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8786 vmovq_n_u8 (uint32_t a)
8788 uint8x16_t result;
8789 __asm__ ("dup %0.16b,%w1"
8790 : "=w"(result)
8791 : "r"(a)
8792 : /* No clobbers */);
8793 return result;
8796 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8797 vmovq_n_u16 (uint32_t a)
8799 uint16x8_t result;
8800 __asm__ ("dup %0.8h,%w1"
8801 : "=w"(result)
8802 : "r"(a)
8803 : /* No clobbers */);
8804 return result;
8807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8808 vmovq_n_u32 (uint32_t a)
8810 uint32x4_t result;
8811 __asm__ ("dup %0.4s,%w1"
8812 : "=w"(result)
8813 : "r"(a)
8814 : /* No clobbers */);
8815 return result;
8818 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8819 vmovq_n_u64 (uint64_t a)
8821 uint64x2_t result;
8822 __asm__ ("dup %0.2d,%x1"
8823 : "=w"(result)
8824 : "r"(a)
8825 : /* No clobbers */);
8826 return result;
8829 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8830 vmul_n_f32 (float32x2_t a, float32_t b)
8832 float32x2_t result;
8833 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8834 : "=w"(result)
8835 : "w"(a), "w"(b)
8836 : /* No clobbers */);
8837 return result;
8840 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8841 vmul_n_s16 (int16x4_t a, int16_t b)
8843 int16x4_t result;
8844 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8845 : "=w"(result)
8846 : "w"(a), "x"(b)
8847 : /* No clobbers */);
8848 return result;
8851 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8852 vmul_n_s32 (int32x2_t a, int32_t b)
8854 int32x2_t result;
8855 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8856 : "=w"(result)
8857 : "w"(a), "w"(b)
8858 : /* No clobbers */);
8859 return result;
8862 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8863 vmul_n_u16 (uint16x4_t a, uint16_t b)
8865 uint16x4_t result;
8866 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8867 : "=w"(result)
8868 : "w"(a), "x"(b)
8869 : /* No clobbers */);
8870 return result;
8873 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8874 vmul_n_u32 (uint32x2_t a, uint32_t b)
8876 uint32x2_t result;
8877 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8878 : "=w"(result)
8879 : "w"(a), "w"(b)
8880 : /* No clobbers */);
8881 return result;
8884 #define vmuld_lane_f64(a, b, c) \
8885 __extension__ \
8886 ({ \
8887 float64x2_t b_ = (b); \
8888 float64_t a_ = (a); \
8889 float64_t result; \
8890 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8891 : "=w"(result) \
8892 : "w"(a_), "w"(b_), "i"(c) \
8893 : /* No clobbers */); \
8894 result; \
8897 #define vmull_high_lane_s16(a, b, c) \
8898 __extension__ \
8899 ({ \
8900 int16x8_t b_ = (b); \
8901 int16x8_t a_ = (a); \
8902 int32x4_t result; \
8903 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8904 : "=w"(result) \
8905 : "w"(a_), "x"(b_), "i"(c) \
8906 : /* No clobbers */); \
8907 result; \
8910 #define vmull_high_lane_s32(a, b, c) \
8911 __extension__ \
8912 ({ \
8913 int32x4_t b_ = (b); \
8914 int32x4_t a_ = (a); \
8915 int64x2_t result; \
8916 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8917 : "=w"(result) \
8918 : "w"(a_), "w"(b_), "i"(c) \
8919 : /* No clobbers */); \
8920 result; \
8923 #define vmull_high_lane_u16(a, b, c) \
8924 __extension__ \
8925 ({ \
8926 uint16x8_t b_ = (b); \
8927 uint16x8_t a_ = (a); \
8928 uint32x4_t result; \
8929 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8930 : "=w"(result) \
8931 : "w"(a_), "x"(b_), "i"(c) \
8932 : /* No clobbers */); \
8933 result; \
8936 #define vmull_high_lane_u32(a, b, c) \
8937 __extension__ \
8938 ({ \
8939 uint32x4_t b_ = (b); \
8940 uint32x4_t a_ = (a); \
8941 uint64x2_t result; \
8942 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8943 : "=w"(result) \
8944 : "w"(a_), "w"(b_), "i"(c) \
8945 : /* No clobbers */); \
8946 result; \
8949 #define vmull_high_laneq_s16(a, b, c) \
8950 __extension__ \
8951 ({ \
8952 int16x8_t b_ = (b); \
8953 int16x8_t a_ = (a); \
8954 int32x4_t result; \
8955 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8956 : "=w"(result) \
8957 : "w"(a_), "x"(b_), "i"(c) \
8958 : /* No clobbers */); \
8959 result; \
8962 #define vmull_high_laneq_s32(a, b, c) \
8963 __extension__ \
8964 ({ \
8965 int32x4_t b_ = (b); \
8966 int32x4_t a_ = (a); \
8967 int64x2_t result; \
8968 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8969 : "=w"(result) \
8970 : "w"(a_), "w"(b_), "i"(c) \
8971 : /* No clobbers */); \
8972 result; \
8975 #define vmull_high_laneq_u16(a, b, c) \
8976 __extension__ \
8977 ({ \
8978 uint16x8_t b_ = (b); \
8979 uint16x8_t a_ = (a); \
8980 uint32x4_t result; \
8981 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8982 : "=w"(result) \
8983 : "w"(a_), "x"(b_), "i"(c) \
8984 : /* No clobbers */); \
8985 result; \
8988 #define vmull_high_laneq_u32(a, b, c) \
8989 __extension__ \
8990 ({ \
8991 uint32x4_t b_ = (b); \
8992 uint32x4_t a_ = (a); \
8993 uint64x2_t result; \
8994 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8995 : "=w"(result) \
8996 : "w"(a_), "w"(b_), "i"(c) \
8997 : /* No clobbers */); \
8998 result; \
9001 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9002 vmull_high_n_s16 (int16x8_t a, int16_t b)
9004 int32x4_t result;
9005 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
9006 : "=w"(result)
9007 : "w"(a), "x"(b)
9008 : /* No clobbers */);
9009 return result;
9012 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9013 vmull_high_n_s32 (int32x4_t a, int32_t b)
9015 int64x2_t result;
9016 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
9017 : "=w"(result)
9018 : "w"(a), "w"(b)
9019 : /* No clobbers */);
9020 return result;
9023 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9024 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
9026 uint32x4_t result;
9027 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
9028 : "=w"(result)
9029 : "w"(a), "x"(b)
9030 : /* No clobbers */);
9031 return result;
9034 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9035 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
9037 uint64x2_t result;
9038 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
9039 : "=w"(result)
9040 : "w"(a), "w"(b)
9041 : /* No clobbers */);
9042 return result;
9045 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9046 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
9048 poly16x8_t result;
9049 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
9050 : "=w"(result)
9051 : "w"(a), "w"(b)
9052 : /* No clobbers */);
9053 return result;
9056 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9057 vmull_high_s8 (int8x16_t a, int8x16_t b)
9059 int16x8_t result;
9060 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
9061 : "=w"(result)
9062 : "w"(a), "w"(b)
9063 : /* No clobbers */);
9064 return result;
9067 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9068 vmull_high_s16 (int16x8_t a, int16x8_t b)
9070 int32x4_t result;
9071 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
9072 : "=w"(result)
9073 : "w"(a), "w"(b)
9074 : /* No clobbers */);
9075 return result;
9078 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9079 vmull_high_s32 (int32x4_t a, int32x4_t b)
9081 int64x2_t result;
9082 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
9083 : "=w"(result)
9084 : "w"(a), "w"(b)
9085 : /* No clobbers */);
9086 return result;
9089 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9090 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
9092 uint16x8_t result;
9093 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
9094 : "=w"(result)
9095 : "w"(a), "w"(b)
9096 : /* No clobbers */);
9097 return result;
9100 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9101 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
9103 uint32x4_t result;
9104 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
9105 : "=w"(result)
9106 : "w"(a), "w"(b)
9107 : /* No clobbers */);
9108 return result;
9111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9112 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
9114 uint64x2_t result;
9115 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
9116 : "=w"(result)
9117 : "w"(a), "w"(b)
9118 : /* No clobbers */);
9119 return result;
9122 #define vmull_lane_s16(a, b, c) \
9123 __extension__ \
9124 ({ \
9125 int16x4_t b_ = (b); \
9126 int16x4_t a_ = (a); \
9127 int32x4_t result; \
9128 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
9129 : "=w"(result) \
9130 : "w"(a_), "x"(b_), "i"(c) \
9131 : /* No clobbers */); \
9132 result; \
9135 #define vmull_lane_s32(a, b, c) \
9136 __extension__ \
9137 ({ \
9138 int32x2_t b_ = (b); \
9139 int32x2_t a_ = (a); \
9140 int64x2_t result; \
9141 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
9142 : "=w"(result) \
9143 : "w"(a_), "w"(b_), "i"(c) \
9144 : /* No clobbers */); \
9145 result; \
9148 #define vmull_lane_u16(a, b, c) \
9149 __extension__ \
9150 ({ \
9151 uint16x4_t b_ = (b); \
9152 uint16x4_t a_ = (a); \
9153 uint32x4_t result; \
9154 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
9155 : "=w"(result) \
9156 : "w"(a_), "x"(b_), "i"(c) \
9157 : /* No clobbers */); \
9158 result; \
9161 #define vmull_lane_u32(a, b, c) \
9162 __extension__ \
9163 ({ \
9164 uint32x2_t b_ = (b); \
9165 uint32x2_t a_ = (a); \
9166 uint64x2_t result; \
9167 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
9168 : "=w"(result) \
9169 : "w"(a_), "w"(b_), "i"(c) \
9170 : /* No clobbers */); \
9171 result; \
9174 #define vmull_laneq_s16(a, b, c) \
9175 __extension__ \
9176 ({ \
9177 int16x8_t b_ = (b); \
9178 int16x4_t a_ = (a); \
9179 int32x4_t result; \
9180 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
9181 : "=w"(result) \
9182 : "w"(a_), "x"(b_), "i"(c) \
9183 : /* No clobbers */); \
9184 result; \
9187 #define vmull_laneq_s32(a, b, c) \
9188 __extension__ \
9189 ({ \
9190 int32x4_t b_ = (b); \
9191 int32x2_t a_ = (a); \
9192 int64x2_t result; \
9193 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
9194 : "=w"(result) \
9195 : "w"(a_), "w"(b_), "i"(c) \
9196 : /* No clobbers */); \
9197 result; \
9200 #define vmull_laneq_u16(a, b, c) \
9201 __extension__ \
9202 ({ \
9203 uint16x8_t b_ = (b); \
9204 uint16x4_t a_ = (a); \
9205 uint32x4_t result; \
9206 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
9207 : "=w"(result) \
9208 : "w"(a_), "x"(b_), "i"(c) \
9209 : /* No clobbers */); \
9210 result; \
9213 #define vmull_laneq_u32(a, b, c) \
9214 __extension__ \
9215 ({ \
9216 uint32x4_t b_ = (b); \
9217 uint32x2_t a_ = (a); \
9218 uint64x2_t result; \
9219 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
9220 : "=w"(result) \
9221 : "w"(a_), "w"(b_), "i"(c) \
9222 : /* No clobbers */); \
9223 result; \
9226 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9227 vmull_n_s16 (int16x4_t a, int16_t b)
9229 int32x4_t result;
9230 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
9231 : "=w"(result)
9232 : "w"(a), "x"(b)
9233 : /* No clobbers */);
9234 return result;
9237 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9238 vmull_n_s32 (int32x2_t a, int32_t b)
9240 int64x2_t result;
9241 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
9242 : "=w"(result)
9243 : "w"(a), "w"(b)
9244 : /* No clobbers */);
9245 return result;
9248 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9249 vmull_n_u16 (uint16x4_t a, uint16_t b)
9251 uint32x4_t result;
9252 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
9253 : "=w"(result)
9254 : "w"(a), "x"(b)
9255 : /* No clobbers */);
9256 return result;
9259 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9260 vmull_n_u32 (uint32x2_t a, uint32_t b)
9262 uint64x2_t result;
9263 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
9264 : "=w"(result)
9265 : "w"(a), "w"(b)
9266 : /* No clobbers */);
9267 return result;
9270 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9271 vmull_p8 (poly8x8_t a, poly8x8_t b)
9273 poly16x8_t result;
9274 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
9275 : "=w"(result)
9276 : "w"(a), "w"(b)
9277 : /* No clobbers */);
9278 return result;
9281 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9282 vmull_s8 (int8x8_t a, int8x8_t b)
9284 int16x8_t result;
9285 __asm__ ("smull %0.8h, %1.8b, %2.8b"
9286 : "=w"(result)
9287 : "w"(a), "w"(b)
9288 : /* No clobbers */);
9289 return result;
9292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9293 vmull_s16 (int16x4_t a, int16x4_t b)
9295 int32x4_t result;
9296 __asm__ ("smull %0.4s, %1.4h, %2.4h"
9297 : "=w"(result)
9298 : "w"(a), "w"(b)
9299 : /* No clobbers */);
9300 return result;
9303 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9304 vmull_s32 (int32x2_t a, int32x2_t b)
9306 int64x2_t result;
9307 __asm__ ("smull %0.2d, %1.2s, %2.2s"
9308 : "=w"(result)
9309 : "w"(a), "w"(b)
9310 : /* No clobbers */);
9311 return result;
9314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9315 vmull_u8 (uint8x8_t a, uint8x8_t b)
9317 uint16x8_t result;
9318 __asm__ ("umull %0.8h, %1.8b, %2.8b"
9319 : "=w"(result)
9320 : "w"(a), "w"(b)
9321 : /* No clobbers */);
9322 return result;
9325 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9326 vmull_u16 (uint16x4_t a, uint16x4_t b)
9328 uint32x4_t result;
9329 __asm__ ("umull %0.4s, %1.4h, %2.4h"
9330 : "=w"(result)
9331 : "w"(a), "w"(b)
9332 : /* No clobbers */);
9333 return result;
9336 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9337 vmull_u32 (uint32x2_t a, uint32x2_t b)
9339 uint64x2_t result;
9340 __asm__ ("umull %0.2d, %1.2s, %2.2s"
9341 : "=w"(result)
9342 : "w"(a), "w"(b)
9343 : /* No clobbers */);
9344 return result;
9347 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9348 vmulq_n_f32 (float32x4_t a, float32_t b)
9350 float32x4_t result;
9351 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
9352 : "=w"(result)
9353 : "w"(a), "w"(b)
9354 : /* No clobbers */);
9355 return result;
9358 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9359 vmulq_n_f64 (float64x2_t a, float64_t b)
9361 float64x2_t result;
9362 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
9363 : "=w"(result)
9364 : "w"(a), "w"(b)
9365 : /* No clobbers */);
9366 return result;
9369 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9370 vmulq_n_s16 (int16x8_t a, int16_t b)
9372 int16x8_t result;
9373 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
9374 : "=w"(result)
9375 : "w"(a), "x"(b)
9376 : /* No clobbers */);
9377 return result;
9380 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9381 vmulq_n_s32 (int32x4_t a, int32_t b)
9383 int32x4_t result;
9384 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
9385 : "=w"(result)
9386 : "w"(a), "w"(b)
9387 : /* No clobbers */);
9388 return result;
9391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9392 vmulq_n_u16 (uint16x8_t a, uint16_t b)
9394 uint16x8_t result;
9395 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
9396 : "=w"(result)
9397 : "w"(a), "x"(b)
9398 : /* No clobbers */);
9399 return result;
9402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9403 vmulq_n_u32 (uint32x4_t a, uint32_t b)
9405 uint32x4_t result;
9406 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
9407 : "=w"(result)
9408 : "w"(a), "w"(b)
9409 : /* No clobbers */);
9410 return result;
9413 #define vmuls_lane_f32(a, b, c) \
9414 __extension__ \
9415 ({ \
9416 float32x4_t b_ = (b); \
9417 float32_t a_ = (a); \
9418 float32_t result; \
9419 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
9420 : "=w"(result) \
9421 : "w"(a_), "w"(b_), "i"(c) \
9422 : /* No clobbers */); \
9423 result; \
9426 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9427 vmulx_f32 (float32x2_t a, float32x2_t b)
9429 float32x2_t result;
9430 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
9431 : "=w"(result)
9432 : "w"(a), "w"(b)
9433 : /* No clobbers */);
9434 return result;
9437 #define vmulx_lane_f32(a, b, c) \
9438 __extension__ \
9439 ({ \
9440 float32x4_t b_ = (b); \
9441 float32x2_t a_ = (a); \
9442 float32x2_t result; \
9443 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
9444 : "=w"(result) \
9445 : "w"(a_), "w"(b_), "i"(c) \
9446 : /* No clobbers */); \
9447 result; \
9450 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9451 vmulxd_f64 (float64_t a, float64_t b)
9453 float64_t result;
9454 __asm__ ("fmulx %d0, %d1, %d2"
9455 : "=w"(result)
9456 : "w"(a), "w"(b)
9457 : /* No clobbers */);
9458 return result;
9461 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9462 vmulxq_f32 (float32x4_t a, float32x4_t b)
9464 float32x4_t result;
9465 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
9466 : "=w"(result)
9467 : "w"(a), "w"(b)
9468 : /* No clobbers */);
9469 return result;
9472 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9473 vmulxq_f64 (float64x2_t a, float64x2_t b)
9475 float64x2_t result;
9476 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
9477 : "=w"(result)
9478 : "w"(a), "w"(b)
9479 : /* No clobbers */);
9480 return result;
9483 #define vmulxq_lane_f32(a, b, c) \
9484 __extension__ \
9485 ({ \
9486 float32x4_t b_ = (b); \
9487 float32x4_t a_ = (a); \
9488 float32x4_t result; \
9489 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
9490 : "=w"(result) \
9491 : "w"(a_), "w"(b_), "i"(c) \
9492 : /* No clobbers */); \
9493 result; \
9496 #define vmulxq_lane_f64(a, b, c) \
9497 __extension__ \
9498 ({ \
9499 float64x2_t b_ = (b); \
9500 float64x2_t a_ = (a); \
9501 float64x2_t result; \
9502 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9503 : "=w"(result) \
9504 : "w"(a_), "w"(b_), "i"(c) \
9505 : /* No clobbers */); \
9506 result; \
9509 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9510 vmulxs_f32 (float32_t a, float32_t b)
9512 float32_t result;
9513 __asm__ ("fmulx %s0, %s1, %s2"
9514 : "=w"(result)
9515 : "w"(a), "w"(b)
9516 : /* No clobbers */);
9517 return result;
9520 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9521 vmvn_p8 (poly8x8_t a)
9523 poly8x8_t result;
9524 __asm__ ("mvn %0.8b,%1.8b"
9525 : "=w"(result)
9526 : "w"(a)
9527 : /* No clobbers */);
9528 return result;
9531 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9532 vmvn_s8 (int8x8_t a)
9534 int8x8_t result;
9535 __asm__ ("mvn %0.8b,%1.8b"
9536 : "=w"(result)
9537 : "w"(a)
9538 : /* No clobbers */);
9539 return result;
9542 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9543 vmvn_s16 (int16x4_t a)
9545 int16x4_t result;
9546 __asm__ ("mvn %0.8b,%1.8b"
9547 : "=w"(result)
9548 : "w"(a)
9549 : /* No clobbers */);
9550 return result;
9553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9554 vmvn_s32 (int32x2_t a)
9556 int32x2_t result;
9557 __asm__ ("mvn %0.8b,%1.8b"
9558 : "=w"(result)
9559 : "w"(a)
9560 : /* No clobbers */);
9561 return result;
9564 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9565 vmvn_u8 (uint8x8_t a)
9567 uint8x8_t result;
9568 __asm__ ("mvn %0.8b,%1.8b"
9569 : "=w"(result)
9570 : "w"(a)
9571 : /* No clobbers */);
9572 return result;
9575 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9576 vmvn_u16 (uint16x4_t a)
9578 uint16x4_t result;
9579 __asm__ ("mvn %0.8b,%1.8b"
9580 : "=w"(result)
9581 : "w"(a)
9582 : /* No clobbers */);
9583 return result;
9586 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9587 vmvn_u32 (uint32x2_t a)
9589 uint32x2_t result;
9590 __asm__ ("mvn %0.8b,%1.8b"
9591 : "=w"(result)
9592 : "w"(a)
9593 : /* No clobbers */);
9594 return result;
9597 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9598 vmvnq_p8 (poly8x16_t a)
9600 poly8x16_t result;
9601 __asm__ ("mvn %0.16b,%1.16b"
9602 : "=w"(result)
9603 : "w"(a)
9604 : /* No clobbers */);
9605 return result;
9608 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9609 vmvnq_s8 (int8x16_t a)
9611 int8x16_t result;
9612 __asm__ ("mvn %0.16b,%1.16b"
9613 : "=w"(result)
9614 : "w"(a)
9615 : /* No clobbers */);
9616 return result;
9619 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9620 vmvnq_s16 (int16x8_t a)
9622 int16x8_t result;
9623 __asm__ ("mvn %0.16b,%1.16b"
9624 : "=w"(result)
9625 : "w"(a)
9626 : /* No clobbers */);
9627 return result;
9630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9631 vmvnq_s32 (int32x4_t a)
9633 int32x4_t result;
9634 __asm__ ("mvn %0.16b,%1.16b"
9635 : "=w"(result)
9636 : "w"(a)
9637 : /* No clobbers */);
9638 return result;
9641 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9642 vmvnq_u8 (uint8x16_t a)
9644 uint8x16_t result;
9645 __asm__ ("mvn %0.16b,%1.16b"
9646 : "=w"(result)
9647 : "w"(a)
9648 : /* No clobbers */);
9649 return result;
9652 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9653 vmvnq_u16 (uint16x8_t a)
9655 uint16x8_t result;
9656 __asm__ ("mvn %0.16b,%1.16b"
9657 : "=w"(result)
9658 : "w"(a)
9659 : /* No clobbers */);
9660 return result;
9663 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9664 vmvnq_u32 (uint32x4_t a)
9666 uint32x4_t result;
9667 __asm__ ("mvn %0.16b,%1.16b"
9668 : "=w"(result)
9669 : "w"(a)
9670 : /* No clobbers */);
9671 return result;
9675 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9676 vpadal_s8 (int16x4_t a, int8x8_t b)
9678 int16x4_t result;
9679 __asm__ ("sadalp %0.4h,%2.8b"
9680 : "=w"(result)
9681 : "0"(a), "w"(b)
9682 : /* No clobbers */);
9683 return result;
9686 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9687 vpadal_s16 (int32x2_t a, int16x4_t b)
9689 int32x2_t result;
9690 __asm__ ("sadalp %0.2s,%2.4h"
9691 : "=w"(result)
9692 : "0"(a), "w"(b)
9693 : /* No clobbers */);
9694 return result;
9697 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9698 vpadal_s32 (int64x1_t a, int32x2_t b)
9700 int64x1_t result;
9701 __asm__ ("sadalp %0.1d,%2.2s"
9702 : "=w"(result)
9703 : "0"(a), "w"(b)
9704 : /* No clobbers */);
9705 return result;
9708 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9709 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9711 uint16x4_t result;
9712 __asm__ ("uadalp %0.4h,%2.8b"
9713 : "=w"(result)
9714 : "0"(a), "w"(b)
9715 : /* No clobbers */);
9716 return result;
9719 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9720 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9722 uint32x2_t result;
9723 __asm__ ("uadalp %0.2s,%2.4h"
9724 : "=w"(result)
9725 : "0"(a), "w"(b)
9726 : /* No clobbers */);
9727 return result;
9730 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9731 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9733 uint64x1_t result;
9734 __asm__ ("uadalp %0.1d,%2.2s"
9735 : "=w"(result)
9736 : "0"(a), "w"(b)
9737 : /* No clobbers */);
9738 return result;
9741 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9742 vpadalq_s8 (int16x8_t a, int8x16_t b)
9744 int16x8_t result;
9745 __asm__ ("sadalp %0.8h,%2.16b"
9746 : "=w"(result)
9747 : "0"(a), "w"(b)
9748 : /* No clobbers */);
9749 return result;
9752 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9753 vpadalq_s16 (int32x4_t a, int16x8_t b)
9755 int32x4_t result;
9756 __asm__ ("sadalp %0.4s,%2.8h"
9757 : "=w"(result)
9758 : "0"(a), "w"(b)
9759 : /* No clobbers */);
9760 return result;
9763 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9764 vpadalq_s32 (int64x2_t a, int32x4_t b)
9766 int64x2_t result;
9767 __asm__ ("sadalp %0.2d,%2.4s"
9768 : "=w"(result)
9769 : "0"(a), "w"(b)
9770 : /* No clobbers */);
9771 return result;
9774 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9775 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9777 uint16x8_t result;
9778 __asm__ ("uadalp %0.8h,%2.16b"
9779 : "=w"(result)
9780 : "0"(a), "w"(b)
9781 : /* No clobbers */);
9782 return result;
9785 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9786 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9788 uint32x4_t result;
9789 __asm__ ("uadalp %0.4s,%2.8h"
9790 : "=w"(result)
9791 : "0"(a), "w"(b)
9792 : /* No clobbers */);
9793 return result;
9796 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9797 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9799 uint64x2_t result;
9800 __asm__ ("uadalp %0.2d,%2.4s"
9801 : "=w"(result)
9802 : "0"(a), "w"(b)
9803 : /* No clobbers */);
9804 return result;
9807 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9808 vpadd_f32 (float32x2_t a, float32x2_t b)
9810 float32x2_t result;
9811 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9812 : "=w"(result)
9813 : "w"(a), "w"(b)
9814 : /* No clobbers */);
9815 return result;
9818 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9819 vpadd_s8 (int8x8_t __a, int8x8_t __b)
9821 return __builtin_aarch64_addpv8qi (__a, __b);
9824 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9825 vpadd_s16 (int16x4_t __a, int16x4_t __b)
9827 return __builtin_aarch64_addpv4hi (__a, __b);
9830 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9831 vpadd_s32 (int32x2_t __a, int32x2_t __b)
9833 return __builtin_aarch64_addpv2si (__a, __b);
9836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9837 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9839 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9840 (int8x8_t) __b);
9843 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9844 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9846 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9847 (int16x4_t) __b);
9850 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9851 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9853 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9854 (int32x2_t) __b);
9857 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9858 vpaddd_f64 (float64x2_t a)
9860 float64_t result;
9861 __asm__ ("faddp %d0,%1.2d"
9862 : "=w"(result)
9863 : "w"(a)
9864 : /* No clobbers */);
9865 return result;
9868 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9869 vpaddl_s8 (int8x8_t a)
9871 int16x4_t result;
9872 __asm__ ("saddlp %0.4h,%1.8b"
9873 : "=w"(result)
9874 : "w"(a)
9875 : /* No clobbers */);
9876 return result;
9879 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9880 vpaddl_s16 (int16x4_t a)
9882 int32x2_t result;
9883 __asm__ ("saddlp %0.2s,%1.4h"
9884 : "=w"(result)
9885 : "w"(a)
9886 : /* No clobbers */);
9887 return result;
9890 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9891 vpaddl_s32 (int32x2_t a)
9893 int64x1_t result;
9894 __asm__ ("saddlp %0.1d,%1.2s"
9895 : "=w"(result)
9896 : "w"(a)
9897 : /* No clobbers */);
9898 return result;
9901 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9902 vpaddl_u8 (uint8x8_t a)
9904 uint16x4_t result;
9905 __asm__ ("uaddlp %0.4h,%1.8b"
9906 : "=w"(result)
9907 : "w"(a)
9908 : /* No clobbers */);
9909 return result;
9912 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9913 vpaddl_u16 (uint16x4_t a)
9915 uint32x2_t result;
9916 __asm__ ("uaddlp %0.2s,%1.4h"
9917 : "=w"(result)
9918 : "w"(a)
9919 : /* No clobbers */);
9920 return result;
9923 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9924 vpaddl_u32 (uint32x2_t a)
9926 uint64x1_t result;
9927 __asm__ ("uaddlp %0.1d,%1.2s"
9928 : "=w"(result)
9929 : "w"(a)
9930 : /* No clobbers */);
9931 return result;
9934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9935 vpaddlq_s8 (int8x16_t a)
9937 int16x8_t result;
9938 __asm__ ("saddlp %0.8h,%1.16b"
9939 : "=w"(result)
9940 : "w"(a)
9941 : /* No clobbers */);
9942 return result;
9945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9946 vpaddlq_s16 (int16x8_t a)
9948 int32x4_t result;
9949 __asm__ ("saddlp %0.4s,%1.8h"
9950 : "=w"(result)
9951 : "w"(a)
9952 : /* No clobbers */);
9953 return result;
9956 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9957 vpaddlq_s32 (int32x4_t a)
9959 int64x2_t result;
9960 __asm__ ("saddlp %0.2d,%1.4s"
9961 : "=w"(result)
9962 : "w"(a)
9963 : /* No clobbers */);
9964 return result;
9967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9968 vpaddlq_u8 (uint8x16_t a)
9970 uint16x8_t result;
9971 __asm__ ("uaddlp %0.8h,%1.16b"
9972 : "=w"(result)
9973 : "w"(a)
9974 : /* No clobbers */);
9975 return result;
9978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9979 vpaddlq_u16 (uint16x8_t a)
9981 uint32x4_t result;
9982 __asm__ ("uaddlp %0.4s,%1.8h"
9983 : "=w"(result)
9984 : "w"(a)
9985 : /* No clobbers */);
9986 return result;
9989 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9990 vpaddlq_u32 (uint32x4_t a)
9992 uint64x2_t result;
9993 __asm__ ("uaddlp %0.2d,%1.4s"
9994 : "=w"(result)
9995 : "w"(a)
9996 : /* No clobbers */);
9997 return result;
10000 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10001 vpaddq_f32 (float32x4_t a, float32x4_t b)
10003 float32x4_t result;
10004 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
10005 : "=w"(result)
10006 : "w"(a), "w"(b)
10007 : /* No clobbers */);
10008 return result;
10011 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10012 vpaddq_f64 (float64x2_t a, float64x2_t b)
10014 float64x2_t result;
10015 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
10016 : "=w"(result)
10017 : "w"(a), "w"(b)
10018 : /* No clobbers */);
10019 return result;
10022 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10023 vpaddq_s8 (int8x16_t a, int8x16_t b)
10025 int8x16_t result;
10026 __asm__ ("addp %0.16b,%1.16b,%2.16b"
10027 : "=w"(result)
10028 : "w"(a), "w"(b)
10029 : /* No clobbers */);
10030 return result;
10033 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10034 vpaddq_s16 (int16x8_t a, int16x8_t b)
10036 int16x8_t result;
10037 __asm__ ("addp %0.8h,%1.8h,%2.8h"
10038 : "=w"(result)
10039 : "w"(a), "w"(b)
10040 : /* No clobbers */);
10041 return result;
10044 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10045 vpaddq_s32 (int32x4_t a, int32x4_t b)
10047 int32x4_t result;
10048 __asm__ ("addp %0.4s,%1.4s,%2.4s"
10049 : "=w"(result)
10050 : "w"(a), "w"(b)
10051 : /* No clobbers */);
10052 return result;
10055 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10056 vpaddq_s64 (int64x2_t a, int64x2_t b)
10058 int64x2_t result;
10059 __asm__ ("addp %0.2d,%1.2d,%2.2d"
10060 : "=w"(result)
10061 : "w"(a), "w"(b)
10062 : /* No clobbers */);
10063 return result;
10066 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10067 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
10069 uint8x16_t result;
10070 __asm__ ("addp %0.16b,%1.16b,%2.16b"
10071 : "=w"(result)
10072 : "w"(a), "w"(b)
10073 : /* No clobbers */);
10074 return result;
10077 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10078 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
10080 uint16x8_t result;
10081 __asm__ ("addp %0.8h,%1.8h,%2.8h"
10082 : "=w"(result)
10083 : "w"(a), "w"(b)
10084 : /* No clobbers */);
10085 return result;
10088 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10089 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
10091 uint32x4_t result;
10092 __asm__ ("addp %0.4s,%1.4s,%2.4s"
10093 : "=w"(result)
10094 : "w"(a), "w"(b)
10095 : /* No clobbers */);
10096 return result;
10099 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10100 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
10102 uint64x2_t result;
10103 __asm__ ("addp %0.2d,%1.2d,%2.2d"
10104 : "=w"(result)
10105 : "w"(a), "w"(b)
10106 : /* No clobbers */);
10107 return result;
10110 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10111 vpadds_f32 (float32x2_t a)
10113 float32_t result;
10114 __asm__ ("faddp %s0,%1.2s"
10115 : "=w"(result)
10116 : "w"(a)
10117 : /* No clobbers */);
10118 return result;
10121 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10122 vpmax_f32 (float32x2_t a, float32x2_t b)
10124 float32x2_t result;
10125 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
10126 : "=w"(result)
10127 : "w"(a), "w"(b)
10128 : /* No clobbers */);
10129 return result;
10132 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10133 vpmax_s8 (int8x8_t a, int8x8_t b)
10135 int8x8_t result;
10136 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
10137 : "=w"(result)
10138 : "w"(a), "w"(b)
10139 : /* No clobbers */);
10140 return result;
10143 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10144 vpmax_s16 (int16x4_t a, int16x4_t b)
10146 int16x4_t result;
10147 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
10148 : "=w"(result)
10149 : "w"(a), "w"(b)
10150 : /* No clobbers */);
10151 return result;
10154 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10155 vpmax_s32 (int32x2_t a, int32x2_t b)
10157 int32x2_t result;
10158 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
10159 : "=w"(result)
10160 : "w"(a), "w"(b)
10161 : /* No clobbers */);
10162 return result;
10165 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10166 vpmax_u8 (uint8x8_t a, uint8x8_t b)
10168 uint8x8_t result;
10169 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
10170 : "=w"(result)
10171 : "w"(a), "w"(b)
10172 : /* No clobbers */);
10173 return result;
10176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10177 vpmax_u16 (uint16x4_t a, uint16x4_t b)
10179 uint16x4_t result;
10180 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
10181 : "=w"(result)
10182 : "w"(a), "w"(b)
10183 : /* No clobbers */);
10184 return result;
10187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10188 vpmax_u32 (uint32x2_t a, uint32x2_t b)
10190 uint32x2_t result;
10191 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
10192 : "=w"(result)
10193 : "w"(a), "w"(b)
10194 : /* No clobbers */);
10195 return result;
10198 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10199 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
10201 float32x2_t result;
10202 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
10203 : "=w"(result)
10204 : "w"(a), "w"(b)
10205 : /* No clobbers */);
10206 return result;
10209 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10210 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
10212 float32x4_t result;
10213 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
10214 : "=w"(result)
10215 : "w"(a), "w"(b)
10216 : /* No clobbers */);
10217 return result;
10220 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10221 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
10223 float64x2_t result;
10224 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
10225 : "=w"(result)
10226 : "w"(a), "w"(b)
10227 : /* No clobbers */);
10228 return result;
10231 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10232 vpmaxnmqd_f64 (float64x2_t a)
10234 float64_t result;
10235 __asm__ ("fmaxnmp %d0,%1.2d"
10236 : "=w"(result)
10237 : "w"(a)
10238 : /* No clobbers */);
10239 return result;
10242 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10243 vpmaxnms_f32 (float32x2_t a)
10245 float32_t result;
10246 __asm__ ("fmaxnmp %s0,%1.2s"
10247 : "=w"(result)
10248 : "w"(a)
10249 : /* No clobbers */);
10250 return result;
10253 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10254 vpmaxq_f32 (float32x4_t a, float32x4_t b)
10256 float32x4_t result;
10257 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
10258 : "=w"(result)
10259 : "w"(a), "w"(b)
10260 : /* No clobbers */);
10261 return result;
10264 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10265 vpmaxq_f64 (float64x2_t a, float64x2_t b)
10267 float64x2_t result;
10268 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
10269 : "=w"(result)
10270 : "w"(a), "w"(b)
10271 : /* No clobbers */);
10272 return result;
10275 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10276 vpmaxq_s8 (int8x16_t a, int8x16_t b)
10278 int8x16_t result;
10279 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
10280 : "=w"(result)
10281 : "w"(a), "w"(b)
10282 : /* No clobbers */);
10283 return result;
10286 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10287 vpmaxq_s16 (int16x8_t a, int16x8_t b)
10289 int16x8_t result;
10290 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
10291 : "=w"(result)
10292 : "w"(a), "w"(b)
10293 : /* No clobbers */);
10294 return result;
10297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10298 vpmaxq_s32 (int32x4_t a, int32x4_t b)
10300 int32x4_t result;
10301 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
10302 : "=w"(result)
10303 : "w"(a), "w"(b)
10304 : /* No clobbers */);
10305 return result;
10308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10309 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
10311 uint8x16_t result;
10312 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
10313 : "=w"(result)
10314 : "w"(a), "w"(b)
10315 : /* No clobbers */);
10316 return result;
10319 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10320 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
10322 uint16x8_t result;
10323 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
10324 : "=w"(result)
10325 : "w"(a), "w"(b)
10326 : /* No clobbers */);
10327 return result;
10330 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10331 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
10333 uint32x4_t result;
10334 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
10335 : "=w"(result)
10336 : "w"(a), "w"(b)
10337 : /* No clobbers */);
10338 return result;
10341 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10342 vpmaxqd_f64 (float64x2_t a)
10344 float64_t result;
10345 __asm__ ("fmaxp %d0,%1.2d"
10346 : "=w"(result)
10347 : "w"(a)
10348 : /* No clobbers */);
10349 return result;
10352 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10353 vpmaxs_f32 (float32x2_t a)
10355 float32_t result;
10356 __asm__ ("fmaxp %s0,%1.2s"
10357 : "=w"(result)
10358 : "w"(a)
10359 : /* No clobbers */);
10360 return result;
10363 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10364 vpmin_f32 (float32x2_t a, float32x2_t b)
10366 float32x2_t result;
10367 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
10368 : "=w"(result)
10369 : "w"(a), "w"(b)
10370 : /* No clobbers */);
10371 return result;
10374 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10375 vpmin_s8 (int8x8_t a, int8x8_t b)
10377 int8x8_t result;
10378 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
10379 : "=w"(result)
10380 : "w"(a), "w"(b)
10381 : /* No clobbers */);
10382 return result;
10385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10386 vpmin_s16 (int16x4_t a, int16x4_t b)
10388 int16x4_t result;
10389 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
10390 : "=w"(result)
10391 : "w"(a), "w"(b)
10392 : /* No clobbers */);
10393 return result;
10396 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10397 vpmin_s32 (int32x2_t a, int32x2_t b)
10399 int32x2_t result;
10400 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
10401 : "=w"(result)
10402 : "w"(a), "w"(b)
10403 : /* No clobbers */);
10404 return result;
10407 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10408 vpmin_u8 (uint8x8_t a, uint8x8_t b)
10410 uint8x8_t result;
10411 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
10412 : "=w"(result)
10413 : "w"(a), "w"(b)
10414 : /* No clobbers */);
10415 return result;
10418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10419 vpmin_u16 (uint16x4_t a, uint16x4_t b)
10421 uint16x4_t result;
10422 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
10423 : "=w"(result)
10424 : "w"(a), "w"(b)
10425 : /* No clobbers */);
10426 return result;
10429 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10430 vpmin_u32 (uint32x2_t a, uint32x2_t b)
10432 uint32x2_t result;
10433 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
10434 : "=w"(result)
10435 : "w"(a), "w"(b)
10436 : /* No clobbers */);
10437 return result;
10440 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10441 vpminnm_f32 (float32x2_t a, float32x2_t b)
10443 float32x2_t result;
10444 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
10445 : "=w"(result)
10446 : "w"(a), "w"(b)
10447 : /* No clobbers */);
10448 return result;
10451 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10452 vpminnmq_f32 (float32x4_t a, float32x4_t b)
10454 float32x4_t result;
10455 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
10456 : "=w"(result)
10457 : "w"(a), "w"(b)
10458 : /* No clobbers */);
10459 return result;
10462 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10463 vpminnmq_f64 (float64x2_t a, float64x2_t b)
10465 float64x2_t result;
10466 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
10467 : "=w"(result)
10468 : "w"(a), "w"(b)
10469 : /* No clobbers */);
10470 return result;
10473 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10474 vpminnmqd_f64 (float64x2_t a)
10476 float64_t result;
10477 __asm__ ("fminnmp %d0,%1.2d"
10478 : "=w"(result)
10479 : "w"(a)
10480 : /* No clobbers */);
10481 return result;
10484 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10485 vpminnms_f32 (float32x2_t a)
10487 float32_t result;
10488 __asm__ ("fminnmp %s0,%1.2s"
10489 : "=w"(result)
10490 : "w"(a)
10491 : /* No clobbers */);
10492 return result;
10495 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10496 vpminq_f32 (float32x4_t a, float32x4_t b)
10498 float32x4_t result;
10499 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
10500 : "=w"(result)
10501 : "w"(a), "w"(b)
10502 : /* No clobbers */);
10503 return result;
10506 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10507 vpminq_f64 (float64x2_t a, float64x2_t b)
10509 float64x2_t result;
10510 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
10511 : "=w"(result)
10512 : "w"(a), "w"(b)
10513 : /* No clobbers */);
10514 return result;
10517 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10518 vpminq_s8 (int8x16_t a, int8x16_t b)
10520 int8x16_t result;
10521 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
10522 : "=w"(result)
10523 : "w"(a), "w"(b)
10524 : /* No clobbers */);
10525 return result;
10528 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10529 vpminq_s16 (int16x8_t a, int16x8_t b)
10531 int16x8_t result;
10532 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
10533 : "=w"(result)
10534 : "w"(a), "w"(b)
10535 : /* No clobbers */);
10536 return result;
10539 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10540 vpminq_s32 (int32x4_t a, int32x4_t b)
10542 int32x4_t result;
10543 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10544 : "=w"(result)
10545 : "w"(a), "w"(b)
10546 : /* No clobbers */);
10547 return result;
10550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10551 vpminq_u8 (uint8x16_t a, uint8x16_t b)
10553 uint8x16_t result;
10554 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10555 : "=w"(result)
10556 : "w"(a), "w"(b)
10557 : /* No clobbers */);
10558 return result;
10561 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10562 vpminq_u16 (uint16x8_t a, uint16x8_t b)
10564 uint16x8_t result;
10565 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10566 : "=w"(result)
10567 : "w"(a), "w"(b)
10568 : /* No clobbers */);
10569 return result;
10572 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10573 vpminq_u32 (uint32x4_t a, uint32x4_t b)
10575 uint32x4_t result;
10576 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10577 : "=w"(result)
10578 : "w"(a), "w"(b)
10579 : /* No clobbers */);
10580 return result;
10583 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10584 vpminqd_f64 (float64x2_t a)
10586 float64_t result;
10587 __asm__ ("fminp %d0,%1.2d"
10588 : "=w"(result)
10589 : "w"(a)
10590 : /* No clobbers */);
10591 return result;
10594 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10595 vpmins_f32 (float32x2_t a)
10597 float32_t result;
10598 __asm__ ("fminp %s0,%1.2s"
10599 : "=w"(result)
10600 : "w"(a)
10601 : /* No clobbers */);
10602 return result;
10605 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10606 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10608 int16x4_t result;
10609 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10610 : "=w"(result)
10611 : "w"(a), "w"(b)
10612 : /* No clobbers */);
10613 return result;
10616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10617 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10619 int32x2_t result;
10620 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10621 : "=w"(result)
10622 : "w"(a), "w"(b)
10623 : /* No clobbers */);
10624 return result;
10627 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10628 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10630 int16x8_t result;
10631 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10632 : "=w"(result)
10633 : "w"(a), "w"(b)
10634 : /* No clobbers */);
10635 return result;
10638 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10639 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10641 int32x4_t result;
10642 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10643 : "=w"(result)
10644 : "w"(a), "w"(b)
10645 : /* No clobbers */);
10646 return result;
10649 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10650 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10652 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10653 __asm__ ("sqxtn2 %0.16b, %1.8h"
10654 : "+w"(result)
10655 : "w"(b)
10656 : /* No clobbers */);
10657 return result;
10660 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10661 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10663 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10664 __asm__ ("sqxtn2 %0.8h, %1.4s"
10665 : "+w"(result)
10666 : "w"(b)
10667 : /* No clobbers */);
10668 return result;
10671 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10672 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10674 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10675 __asm__ ("sqxtn2 %0.4s, %1.2d"
10676 : "+w"(result)
10677 : "w"(b)
10678 : /* No clobbers */);
10679 return result;
10682 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10683 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10685 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10686 __asm__ ("uqxtn2 %0.16b, %1.8h"
10687 : "+w"(result)
10688 : "w"(b)
10689 : /* No clobbers */);
10690 return result;
10693 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10694 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10696 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10697 __asm__ ("uqxtn2 %0.8h, %1.4s"
10698 : "+w"(result)
10699 : "w"(b)
10700 : /* No clobbers */);
10701 return result;
10704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10705 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10707 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10708 __asm__ ("uqxtn2 %0.4s, %1.2d"
10709 : "+w"(result)
10710 : "w"(b)
10711 : /* No clobbers */);
10712 return result;
10715 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10716 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10718 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10719 __asm__ ("sqxtun2 %0.16b, %1.8h"
10720 : "+w"(result)
10721 : "w"(b)
10722 : /* No clobbers */);
10723 return result;
10726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10727 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10729 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10730 __asm__ ("sqxtun2 %0.8h, %1.4s"
10731 : "+w"(result)
10732 : "w"(b)
10733 : /* No clobbers */);
10734 return result;
10737 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10738 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10740 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10741 __asm__ ("sqxtun2 %0.4s, %1.2d"
10742 : "+w"(result)
10743 : "w"(b)
10744 : /* No clobbers */);
10745 return result;
10748 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10749 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10751 int16x4_t result;
10752 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10753 : "=w"(result)
10754 : "w"(a), "x"(b)
10755 : /* No clobbers */);
10756 return result;
10759 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10760 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10762 int32x2_t result;
10763 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10764 : "=w"(result)
10765 : "w"(a), "w"(b)
10766 : /* No clobbers */);
10767 return result;
10770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10771 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10773 int16x8_t result;
10774 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10775 : "=w"(result)
10776 : "w"(a), "x"(b)
10777 : /* No clobbers */);
10778 return result;
10781 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10782 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10784 int32x4_t result;
10785 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10786 : "=w"(result)
10787 : "w"(a), "w"(b)
10788 : /* No clobbers */);
10789 return result;
10792 #define vqrshrn_high_n_s16(a, b, c) \
10793 __extension__ \
10794 ({ \
10795 int16x8_t b_ = (b); \
10796 int8x8_t a_ = (a); \
10797 int8x16_t result = vcombine_s8 \
10798 (a_, vcreate_s8 \
10799 (__AARCH64_UINT64_C (0x0))); \
10800 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10801 : "+w"(result) \
10802 : "w"(b_), "i"(c) \
10803 : /* No clobbers */); \
10804 result; \
10807 #define vqrshrn_high_n_s32(a, b, c) \
10808 __extension__ \
10809 ({ \
10810 int32x4_t b_ = (b); \
10811 int16x4_t a_ = (a); \
10812 int16x8_t result = vcombine_s16 \
10813 (a_, vcreate_s16 \
10814 (__AARCH64_UINT64_C (0x0))); \
10815 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10816 : "+w"(result) \
10817 : "w"(b_), "i"(c) \
10818 : /* No clobbers */); \
10819 result; \
10822 #define vqrshrn_high_n_s64(a, b, c) \
10823 __extension__ \
10824 ({ \
10825 int64x2_t b_ = (b); \
10826 int32x2_t a_ = (a); \
10827 int32x4_t result = vcombine_s32 \
10828 (a_, vcreate_s32 \
10829 (__AARCH64_UINT64_C (0x0))); \
10830 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10831 : "+w"(result) \
10832 : "w"(b_), "i"(c) \
10833 : /* No clobbers */); \
10834 result; \
10837 #define vqrshrn_high_n_u16(a, b, c) \
10838 __extension__ \
10839 ({ \
10840 uint16x8_t b_ = (b); \
10841 uint8x8_t a_ = (a); \
10842 uint8x16_t result = vcombine_u8 \
10843 (a_, vcreate_u8 \
10844 (__AARCH64_UINT64_C (0x0))); \
10845 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10846 : "+w"(result) \
10847 : "w"(b_), "i"(c) \
10848 : /* No clobbers */); \
10849 result; \
10852 #define vqrshrn_high_n_u32(a, b, c) \
10853 __extension__ \
10854 ({ \
10855 uint32x4_t b_ = (b); \
10856 uint16x4_t a_ = (a); \
10857 uint16x8_t result = vcombine_u16 \
10858 (a_, vcreate_u16 \
10859 (__AARCH64_UINT64_C (0x0))); \
10860 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10861 : "+w"(result) \
10862 : "w"(b_), "i"(c) \
10863 : /* No clobbers */); \
10864 result; \
10867 #define vqrshrn_high_n_u64(a, b, c) \
10868 __extension__ \
10869 ({ \
10870 uint64x2_t b_ = (b); \
10871 uint32x2_t a_ = (a); \
10872 uint32x4_t result = vcombine_u32 \
10873 (a_, vcreate_u32 \
10874 (__AARCH64_UINT64_C (0x0))); \
10875 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10876 : "+w"(result) \
10877 : "w"(b_), "i"(c) \
10878 : /* No clobbers */); \
10879 result; \
10882 #define vqrshrun_high_n_s16(a, b, c) \
10883 __extension__ \
10884 ({ \
10885 int16x8_t b_ = (b); \
10886 uint8x8_t a_ = (a); \
10887 uint8x16_t result = vcombine_u8 \
10888 (a_, vcreate_u8 \
10889 (__AARCH64_UINT64_C (0x0))); \
10890 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10891 : "+w"(result) \
10892 : "w"(b_), "i"(c) \
10893 : /* No clobbers */); \
10894 result; \
10897 #define vqrshrun_high_n_s32(a, b, c) \
10898 __extension__ \
10899 ({ \
10900 int32x4_t b_ = (b); \
10901 uint16x4_t a_ = (a); \
10902 uint16x8_t result = vcombine_u16 \
10903 (a_, vcreate_u16 \
10904 (__AARCH64_UINT64_C (0x0))); \
10905 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10906 : "+w"(result) \
10907 : "w"(b_), "i"(c) \
10908 : /* No clobbers */); \
10909 result; \
10912 #define vqrshrun_high_n_s64(a, b, c) \
10913 __extension__ \
10914 ({ \
10915 int64x2_t b_ = (b); \
10916 uint32x2_t a_ = (a); \
10917 uint32x4_t result = vcombine_u32 \
10918 (a_, vcreate_u32 \
10919 (__AARCH64_UINT64_C (0x0))); \
10920 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10921 : "+w"(result) \
10922 : "w"(b_), "i"(c) \
10923 : /* No clobbers */); \
10924 result; \
10927 #define vqshrn_high_n_s16(a, b, c) \
10928 __extension__ \
10929 ({ \
10930 int16x8_t b_ = (b); \
10931 int8x8_t a_ = (a); \
10932 int8x16_t result = vcombine_s8 \
10933 (a_, vcreate_s8 \
10934 (__AARCH64_UINT64_C (0x0))); \
10935 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10936 : "+w"(result) \
10937 : "w"(b_), "i"(c) \
10938 : /* No clobbers */); \
10939 result; \
10942 #define vqshrn_high_n_s32(a, b, c) \
10943 __extension__ \
10944 ({ \
10945 int32x4_t b_ = (b); \
10946 int16x4_t a_ = (a); \
10947 int16x8_t result = vcombine_s16 \
10948 (a_, vcreate_s16 \
10949 (__AARCH64_UINT64_C (0x0))); \
10950 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10951 : "+w"(result) \
10952 : "w"(b_), "i"(c) \
10953 : /* No clobbers */); \
10954 result; \
10957 #define vqshrn_high_n_s64(a, b, c) \
10958 __extension__ \
10959 ({ \
10960 int64x2_t b_ = (b); \
10961 int32x2_t a_ = (a); \
10962 int32x4_t result = vcombine_s32 \
10963 (a_, vcreate_s32 \
10964 (__AARCH64_UINT64_C (0x0))); \
10965 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10966 : "+w"(result) \
10967 : "w"(b_), "i"(c) \
10968 : /* No clobbers */); \
10969 result; \
10972 #define vqshrn_high_n_u16(a, b, c) \
10973 __extension__ \
10974 ({ \
10975 uint16x8_t b_ = (b); \
10976 uint8x8_t a_ = (a); \
10977 uint8x16_t result = vcombine_u8 \
10978 (a_, vcreate_u8 \
10979 (__AARCH64_UINT64_C (0x0))); \
10980 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10981 : "+w"(result) \
10982 : "w"(b_), "i"(c) \
10983 : /* No clobbers */); \
10984 result; \
10987 #define vqshrn_high_n_u32(a, b, c) \
10988 __extension__ \
10989 ({ \
10990 uint32x4_t b_ = (b); \
10991 uint16x4_t a_ = (a); \
10992 uint16x8_t result = vcombine_u16 \
10993 (a_, vcreate_u16 \
10994 (__AARCH64_UINT64_C (0x0))); \
10995 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10996 : "+w"(result) \
10997 : "w"(b_), "i"(c) \
10998 : /* No clobbers */); \
10999 result; \
11002 #define vqshrn_high_n_u64(a, b, c) \
11003 __extension__ \
11004 ({ \
11005 uint64x2_t b_ = (b); \
11006 uint32x2_t a_ = (a); \
11007 uint32x4_t result = vcombine_u32 \
11008 (a_, vcreate_u32 \
11009 (__AARCH64_UINT64_C (0x0))); \
11010 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
11011 : "+w"(result) \
11012 : "w"(b_), "i"(c) \
11013 : /* No clobbers */); \
11014 result; \
11017 #define vqshrun_high_n_s16(a, b, c) \
11018 __extension__ \
11019 ({ \
11020 int16x8_t b_ = (b); \
11021 uint8x8_t a_ = (a); \
11022 uint8x16_t result = vcombine_u8 \
11023 (a_, vcreate_u8 \
11024 (__AARCH64_UINT64_C (0x0))); \
11025 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
11026 : "+w"(result) \
11027 : "w"(b_), "i"(c) \
11028 : /* No clobbers */); \
11029 result; \
11032 #define vqshrun_high_n_s32(a, b, c) \
11033 __extension__ \
11034 ({ \
11035 int32x4_t b_ = (b); \
11036 uint16x4_t a_ = (a); \
11037 uint16x8_t result = vcombine_u16 \
11038 (a_, vcreate_u16 \
11039 (__AARCH64_UINT64_C (0x0))); \
11040 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
11041 : "+w"(result) \
11042 : "w"(b_), "i"(c) \
11043 : /* No clobbers */); \
11044 result; \
11047 #define vqshrun_high_n_s64(a, b, c) \
11048 __extension__ \
11049 ({ \
11050 int64x2_t b_ = (b); \
11051 uint32x2_t a_ = (a); \
11052 uint32x4_t result = vcombine_u32 \
11053 (a_, vcreate_u32 \
11054 (__AARCH64_UINT64_C (0x0))); \
11055 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
11056 : "+w"(result) \
11057 : "w"(b_), "i"(c) \
11058 : /* No clobbers */); \
11059 result; \
11062 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11063 vrbit_s8 (int8x8_t a)
11065 int8x8_t result;
11066 __asm__ ("rbit %0.8b,%1.8b"
11067 : "=w"(result)
11068 : "w"(a)
11069 : /* No clobbers */);
11070 return result;
11073 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11074 vrbit_u8 (uint8x8_t a)
11076 uint8x8_t result;
11077 __asm__ ("rbit %0.8b,%1.8b"
11078 : "=w"(result)
11079 : "w"(a)
11080 : /* No clobbers */);
11081 return result;
11084 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11085 vrbitq_s8 (int8x16_t a)
11087 int8x16_t result;
11088 __asm__ ("rbit %0.16b,%1.16b"
11089 : "=w"(result)
11090 : "w"(a)
11091 : /* No clobbers */);
11092 return result;
11095 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11096 vrbitq_u8 (uint8x16_t a)
11098 uint8x16_t result;
11099 __asm__ ("rbit %0.16b,%1.16b"
11100 : "=w"(result)
11101 : "w"(a)
11102 : /* No clobbers */);
11103 return result;
11106 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11107 vrecpe_u32 (uint32x2_t a)
11109 uint32x2_t result;
11110 __asm__ ("urecpe %0.2s,%1.2s"
11111 : "=w"(result)
11112 : "w"(a)
11113 : /* No clobbers */);
11114 return result;
11117 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11118 vrecpeq_u32 (uint32x4_t a)
11120 uint32x4_t result;
11121 __asm__ ("urecpe %0.4s,%1.4s"
11122 : "=w"(result)
11123 : "w"(a)
11124 : /* No clobbers */);
11125 return result;
11128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11129 vrev16_p8 (poly8x8_t a)
11131 poly8x8_t result;
11132 __asm__ ("rev16 %0.8b,%1.8b"
11133 : "=w"(result)
11134 : "w"(a)
11135 : /* No clobbers */);
11136 return result;
11139 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11140 vrev16_s8 (int8x8_t a)
11142 int8x8_t result;
11143 __asm__ ("rev16 %0.8b,%1.8b"
11144 : "=w"(result)
11145 : "w"(a)
11146 : /* No clobbers */);
11147 return result;
11150 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11151 vrev16_u8 (uint8x8_t a)
11153 uint8x8_t result;
11154 __asm__ ("rev16 %0.8b,%1.8b"
11155 : "=w"(result)
11156 : "w"(a)
11157 : /* No clobbers */);
11158 return result;
11161 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11162 vrev16q_p8 (poly8x16_t a)
11164 poly8x16_t result;
11165 __asm__ ("rev16 %0.16b,%1.16b"
11166 : "=w"(result)
11167 : "w"(a)
11168 : /* No clobbers */);
11169 return result;
11172 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11173 vrev16q_s8 (int8x16_t a)
11175 int8x16_t result;
11176 __asm__ ("rev16 %0.16b,%1.16b"
11177 : "=w"(result)
11178 : "w"(a)
11179 : /* No clobbers */);
11180 return result;
11183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11184 vrev16q_u8 (uint8x16_t a)
11186 uint8x16_t result;
11187 __asm__ ("rev16 %0.16b,%1.16b"
11188 : "=w"(result)
11189 : "w"(a)
11190 : /* No clobbers */);
11191 return result;
11194 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11195 vrev32_p8 (poly8x8_t a)
11197 poly8x8_t result;
11198 __asm__ ("rev32 %0.8b,%1.8b"
11199 : "=w"(result)
11200 : "w"(a)
11201 : /* No clobbers */);
11202 return result;
11205 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11206 vrev32_p16 (poly16x4_t a)
11208 poly16x4_t result;
11209 __asm__ ("rev32 %0.4h,%1.4h"
11210 : "=w"(result)
11211 : "w"(a)
11212 : /* No clobbers */);
11213 return result;
11216 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11217 vrev32_s8 (int8x8_t a)
11219 int8x8_t result;
11220 __asm__ ("rev32 %0.8b,%1.8b"
11221 : "=w"(result)
11222 : "w"(a)
11223 : /* No clobbers */);
11224 return result;
11227 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11228 vrev32_s16 (int16x4_t a)
11230 int16x4_t result;
11231 __asm__ ("rev32 %0.4h,%1.4h"
11232 : "=w"(result)
11233 : "w"(a)
11234 : /* No clobbers */);
11235 return result;
11238 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11239 vrev32_u8 (uint8x8_t a)
11241 uint8x8_t result;
11242 __asm__ ("rev32 %0.8b,%1.8b"
11243 : "=w"(result)
11244 : "w"(a)
11245 : /* No clobbers */);
11246 return result;
11249 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11250 vrev32_u16 (uint16x4_t a)
11252 uint16x4_t result;
11253 __asm__ ("rev32 %0.4h,%1.4h"
11254 : "=w"(result)
11255 : "w"(a)
11256 : /* No clobbers */);
11257 return result;
11260 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11261 vrev32q_p8 (poly8x16_t a)
11263 poly8x16_t result;
11264 __asm__ ("rev32 %0.16b,%1.16b"
11265 : "=w"(result)
11266 : "w"(a)
11267 : /* No clobbers */);
11268 return result;
11271 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11272 vrev32q_p16 (poly16x8_t a)
11274 poly16x8_t result;
11275 __asm__ ("rev32 %0.8h,%1.8h"
11276 : "=w"(result)
11277 : "w"(a)
11278 : /* No clobbers */);
11279 return result;
11282 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11283 vrev32q_s8 (int8x16_t a)
11285 int8x16_t result;
11286 __asm__ ("rev32 %0.16b,%1.16b"
11287 : "=w"(result)
11288 : "w"(a)
11289 : /* No clobbers */);
11290 return result;
11293 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11294 vrev32q_s16 (int16x8_t a)
11296 int16x8_t result;
11297 __asm__ ("rev32 %0.8h,%1.8h"
11298 : "=w"(result)
11299 : "w"(a)
11300 : /* No clobbers */);
11301 return result;
11304 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11305 vrev32q_u8 (uint8x16_t a)
11307 uint8x16_t result;
11308 __asm__ ("rev32 %0.16b,%1.16b"
11309 : "=w"(result)
11310 : "w"(a)
11311 : /* No clobbers */);
11312 return result;
11315 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11316 vrev32q_u16 (uint16x8_t a)
11318 uint16x8_t result;
11319 __asm__ ("rev32 %0.8h,%1.8h"
11320 : "=w"(result)
11321 : "w"(a)
11322 : /* No clobbers */);
11323 return result;
11326 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11327 vrev64_f32 (float32x2_t a)
11329 float32x2_t result;
11330 __asm__ ("rev64 %0.2s,%1.2s"
11331 : "=w"(result)
11332 : "w"(a)
11333 : /* No clobbers */);
11334 return result;
11337 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11338 vrev64_p8 (poly8x8_t a)
11340 poly8x8_t result;
11341 __asm__ ("rev64 %0.8b,%1.8b"
11342 : "=w"(result)
11343 : "w"(a)
11344 : /* No clobbers */);
11345 return result;
11348 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11349 vrev64_p16 (poly16x4_t a)
11351 poly16x4_t result;
11352 __asm__ ("rev64 %0.4h,%1.4h"
11353 : "=w"(result)
11354 : "w"(a)
11355 : /* No clobbers */);
11356 return result;
11359 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11360 vrev64_s8 (int8x8_t a)
11362 int8x8_t result;
11363 __asm__ ("rev64 %0.8b,%1.8b"
11364 : "=w"(result)
11365 : "w"(a)
11366 : /* No clobbers */);
11367 return result;
11370 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11371 vrev64_s16 (int16x4_t a)
11373 int16x4_t result;
11374 __asm__ ("rev64 %0.4h,%1.4h"
11375 : "=w"(result)
11376 : "w"(a)
11377 : /* No clobbers */);
11378 return result;
11381 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11382 vrev64_s32 (int32x2_t a)
11384 int32x2_t result;
11385 __asm__ ("rev64 %0.2s,%1.2s"
11386 : "=w"(result)
11387 : "w"(a)
11388 : /* No clobbers */);
11389 return result;
11392 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11393 vrev64_u8 (uint8x8_t a)
11395 uint8x8_t result;
11396 __asm__ ("rev64 %0.8b,%1.8b"
11397 : "=w"(result)
11398 : "w"(a)
11399 : /* No clobbers */);
11400 return result;
11403 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11404 vrev64_u16 (uint16x4_t a)
11406 uint16x4_t result;
11407 __asm__ ("rev64 %0.4h,%1.4h"
11408 : "=w"(result)
11409 : "w"(a)
11410 : /* No clobbers */);
11411 return result;
11414 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11415 vrev64_u32 (uint32x2_t a)
11417 uint32x2_t result;
11418 __asm__ ("rev64 %0.2s,%1.2s"
11419 : "=w"(result)
11420 : "w"(a)
11421 : /* No clobbers */);
11422 return result;
11425 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11426 vrev64q_f32 (float32x4_t a)
11428 float32x4_t result;
11429 __asm__ ("rev64 %0.4s,%1.4s"
11430 : "=w"(result)
11431 : "w"(a)
11432 : /* No clobbers */);
11433 return result;
11436 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11437 vrev64q_p8 (poly8x16_t a)
11439 poly8x16_t result;
11440 __asm__ ("rev64 %0.16b,%1.16b"
11441 : "=w"(result)
11442 : "w"(a)
11443 : /* No clobbers */);
11444 return result;
11447 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11448 vrev64q_p16 (poly16x8_t a)
11450 poly16x8_t result;
11451 __asm__ ("rev64 %0.8h,%1.8h"
11452 : "=w"(result)
11453 : "w"(a)
11454 : /* No clobbers */);
11455 return result;
11458 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11459 vrev64q_s8 (int8x16_t a)
11461 int8x16_t result;
11462 __asm__ ("rev64 %0.16b,%1.16b"
11463 : "=w"(result)
11464 : "w"(a)
11465 : /* No clobbers */);
11466 return result;
11469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11470 vrev64q_s16 (int16x8_t a)
11472 int16x8_t result;
11473 __asm__ ("rev64 %0.8h,%1.8h"
11474 : "=w"(result)
11475 : "w"(a)
11476 : /* No clobbers */);
11477 return result;
11480 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11481 vrev64q_s32 (int32x4_t a)
11483 int32x4_t result;
11484 __asm__ ("rev64 %0.4s,%1.4s"
11485 : "=w"(result)
11486 : "w"(a)
11487 : /* No clobbers */);
11488 return result;
11491 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11492 vrev64q_u8 (uint8x16_t a)
11494 uint8x16_t result;
11495 __asm__ ("rev64 %0.16b,%1.16b"
11496 : "=w"(result)
11497 : "w"(a)
11498 : /* No clobbers */);
11499 return result;
11502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11503 vrev64q_u16 (uint16x8_t a)
11505 uint16x8_t result;
11506 __asm__ ("rev64 %0.8h,%1.8h"
11507 : "=w"(result)
11508 : "w"(a)
11509 : /* No clobbers */);
11510 return result;
11513 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11514 vrev64q_u32 (uint32x4_t a)
11516 uint32x4_t result;
11517 __asm__ ("rev64 %0.4s,%1.4s"
11518 : "=w"(result)
11519 : "w"(a)
11520 : /* No clobbers */);
11521 return result;
11524 #define vrshrn_high_n_s16(a, b, c) \
11525 __extension__ \
11526 ({ \
11527 int16x8_t b_ = (b); \
11528 int8x8_t a_ = (a); \
11529 int8x16_t result = vcombine_s8 \
11530 (a_, vcreate_s8 \
11531 (__AARCH64_UINT64_C (0x0))); \
11532 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11533 : "+w"(result) \
11534 : "w"(b_), "i"(c) \
11535 : /* No clobbers */); \
11536 result; \
11539 #define vrshrn_high_n_s32(a, b, c) \
11540 __extension__ \
11541 ({ \
11542 int32x4_t b_ = (b); \
11543 int16x4_t a_ = (a); \
11544 int16x8_t result = vcombine_s16 \
11545 (a_, vcreate_s16 \
11546 (__AARCH64_UINT64_C (0x0))); \
11547 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11548 : "+w"(result) \
11549 : "w"(b_), "i"(c) \
11550 : /* No clobbers */); \
11551 result; \
11554 #define vrshrn_high_n_s64(a, b, c) \
11555 __extension__ \
11556 ({ \
11557 int64x2_t b_ = (b); \
11558 int32x2_t a_ = (a); \
11559 int32x4_t result = vcombine_s32 \
11560 (a_, vcreate_s32 \
11561 (__AARCH64_UINT64_C (0x0))); \
11562 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11563 : "+w"(result) \
11564 : "w"(b_), "i"(c) \
11565 : /* No clobbers */); \
11566 result; \
11569 #define vrshrn_high_n_u16(a, b, c) \
11570 __extension__ \
11571 ({ \
11572 uint16x8_t b_ = (b); \
11573 uint8x8_t a_ = (a); \
11574 uint8x16_t result = vcombine_u8 \
11575 (a_, vcreate_u8 \
11576 (__AARCH64_UINT64_C (0x0))); \
11577 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11578 : "+w"(result) \
11579 : "w"(b_), "i"(c) \
11580 : /* No clobbers */); \
11581 result; \
11584 #define vrshrn_high_n_u32(a, b, c) \
11585 __extension__ \
11586 ({ \
11587 uint32x4_t b_ = (b); \
11588 uint16x4_t a_ = (a); \
11589 uint16x8_t result = vcombine_u16 \
11590 (a_, vcreate_u16 \
11591 (__AARCH64_UINT64_C (0x0))); \
11592 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11593 : "+w"(result) \
11594 : "w"(b_), "i"(c) \
11595 : /* No clobbers */); \
11596 result; \
11599 #define vrshrn_high_n_u64(a, b, c) \
11600 __extension__ \
11601 ({ \
11602 uint64x2_t b_ = (b); \
11603 uint32x2_t a_ = (a); \
11604 uint32x4_t result = vcombine_u32 \
11605 (a_, vcreate_u32 \
11606 (__AARCH64_UINT64_C (0x0))); \
11607 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11608 : "+w"(result) \
11609 : "w"(b_), "i"(c) \
11610 : /* No clobbers */); \
11611 result; \
11614 #define vrshrn_n_s16(a, b) \
11615 __extension__ \
11616 ({ \
11617 int16x8_t a_ = (a); \
11618 int8x8_t result; \
11619 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11620 : "=w"(result) \
11621 : "w"(a_), "i"(b) \
11622 : /* No clobbers */); \
11623 result; \
11626 #define vrshrn_n_s32(a, b) \
11627 __extension__ \
11628 ({ \
11629 int32x4_t a_ = (a); \
11630 int16x4_t result; \
11631 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11632 : "=w"(result) \
11633 : "w"(a_), "i"(b) \
11634 : /* No clobbers */); \
11635 result; \
11638 #define vrshrn_n_s64(a, b) \
11639 __extension__ \
11640 ({ \
11641 int64x2_t a_ = (a); \
11642 int32x2_t result; \
11643 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11644 : "=w"(result) \
11645 : "w"(a_), "i"(b) \
11646 : /* No clobbers */); \
11647 result; \
11650 #define vrshrn_n_u16(a, b) \
11651 __extension__ \
11652 ({ \
11653 uint16x8_t a_ = (a); \
11654 uint8x8_t result; \
11655 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11656 : "=w"(result) \
11657 : "w"(a_), "i"(b) \
11658 : /* No clobbers */); \
11659 result; \
11662 #define vrshrn_n_u32(a, b) \
11663 __extension__ \
11664 ({ \
11665 uint32x4_t a_ = (a); \
11666 uint16x4_t result; \
11667 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11668 : "=w"(result) \
11669 : "w"(a_), "i"(b) \
11670 : /* No clobbers */); \
11671 result; \
11674 #define vrshrn_n_u64(a, b) \
11675 __extension__ \
11676 ({ \
11677 uint64x2_t a_ = (a); \
11678 uint32x2_t result; \
11679 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11680 : "=w"(result) \
11681 : "w"(a_), "i"(b) \
11682 : /* No clobbers */); \
11683 result; \
11686 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11687 vrsqrte_f32 (float32x2_t a)
11689 float32x2_t result;
11690 __asm__ ("frsqrte %0.2s,%1.2s"
11691 : "=w"(result)
11692 : "w"(a)
11693 : /* No clobbers */);
11694 return result;
11697 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11698 vrsqrte_f64 (float64x1_t a)
11700 float64x1_t result;
11701 __asm__ ("frsqrte %d0,%d1"
11702 : "=w"(result)
11703 : "w"(a)
11704 : /* No clobbers */);
11705 return result;
11708 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11709 vrsqrte_u32 (uint32x2_t a)
11711 uint32x2_t result;
11712 __asm__ ("ursqrte %0.2s,%1.2s"
11713 : "=w"(result)
11714 : "w"(a)
11715 : /* No clobbers */);
11716 return result;
11719 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11720 vrsqrted_f64 (float64_t a)
11722 float64_t result;
11723 __asm__ ("frsqrte %d0,%d1"
11724 : "=w"(result)
11725 : "w"(a)
11726 : /* No clobbers */);
11727 return result;
11730 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11731 vrsqrteq_f32 (float32x4_t a)
11733 float32x4_t result;
11734 __asm__ ("frsqrte %0.4s,%1.4s"
11735 : "=w"(result)
11736 : "w"(a)
11737 : /* No clobbers */);
11738 return result;
11741 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11742 vrsqrteq_f64 (float64x2_t a)
11744 float64x2_t result;
11745 __asm__ ("frsqrte %0.2d,%1.2d"
11746 : "=w"(result)
11747 : "w"(a)
11748 : /* No clobbers */);
11749 return result;
11752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11753 vrsqrteq_u32 (uint32x4_t a)
11755 uint32x4_t result;
11756 __asm__ ("ursqrte %0.4s,%1.4s"
11757 : "=w"(result)
11758 : "w"(a)
11759 : /* No clobbers */);
11760 return result;
11763 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11764 vrsqrtes_f32 (float32_t a)
11766 float32_t result;
11767 __asm__ ("frsqrte %s0,%s1"
11768 : "=w"(result)
11769 : "w"(a)
11770 : /* No clobbers */);
11771 return result;
11774 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11775 vrsqrts_f32 (float32x2_t a, float32x2_t b)
11777 float32x2_t result;
11778 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
11779 : "=w"(result)
11780 : "w"(a), "w"(b)
11781 : /* No clobbers */);
11782 return result;
11785 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11786 vrsqrtsd_f64 (float64_t a, float64_t b)
11788 float64_t result;
11789 __asm__ ("frsqrts %d0,%d1,%d2"
11790 : "=w"(result)
11791 : "w"(a), "w"(b)
11792 : /* No clobbers */);
11793 return result;
11796 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11797 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
11799 float32x4_t result;
11800 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
11801 : "=w"(result)
11802 : "w"(a), "w"(b)
11803 : /* No clobbers */);
11804 return result;
11807 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11808 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
11810 float64x2_t result;
11811 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11812 : "=w"(result)
11813 : "w"(a), "w"(b)
11814 : /* No clobbers */);
11815 return result;
11818 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11819 vrsqrtss_f32 (float32_t a, float32_t b)
11821 float32_t result;
11822 __asm__ ("frsqrts %s0,%s1,%s2"
11823 : "=w"(result)
11824 : "w"(a), "w"(b)
11825 : /* No clobbers */);
11826 return result;
11829 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11830 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
11832 float64x2_t result;
11833 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11834 : "=w"(result)
11835 : "w"(a), "w"(b)
11836 : /* No clobbers */);
11837 return result;
11840 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11841 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11843 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11844 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11845 : "+w"(result)
11846 : "w"(b), "w"(c)
11847 : /* No clobbers */);
11848 return result;
11851 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11852 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11854 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11855 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11856 : "+w"(result)
11857 : "w"(b), "w"(c)
11858 : /* No clobbers */);
11859 return result;
11862 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11863 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11865 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11866 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11867 : "+w"(result)
11868 : "w"(b), "w"(c)
11869 : /* No clobbers */);
11870 return result;
11873 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11874 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11876 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11877 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11878 : "+w"(result)
11879 : "w"(b), "w"(c)
11880 : /* No clobbers */);
11881 return result;
11884 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11885 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11887 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11888 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11889 : "+w"(result)
11890 : "w"(b), "w"(c)
11891 : /* No clobbers */);
11892 return result;
11895 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11896 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11898 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11899 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11900 : "+w"(result)
11901 : "w"(b), "w"(c)
11902 : /* No clobbers */);
11903 return result;
11906 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11907 vrsubhn_s16 (int16x8_t a, int16x8_t b)
11909 int8x8_t result;
11910 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11911 : "=w"(result)
11912 : "w"(a), "w"(b)
11913 : /* No clobbers */);
11914 return result;
11917 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11918 vrsubhn_s32 (int32x4_t a, int32x4_t b)
11920 int16x4_t result;
11921 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11922 : "=w"(result)
11923 : "w"(a), "w"(b)
11924 : /* No clobbers */);
11925 return result;
11928 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11929 vrsubhn_s64 (int64x2_t a, int64x2_t b)
11931 int32x2_t result;
11932 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11933 : "=w"(result)
11934 : "w"(a), "w"(b)
11935 : /* No clobbers */);
11936 return result;
11939 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11940 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
11942 uint8x8_t result;
11943 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11944 : "=w"(result)
11945 : "w"(a), "w"(b)
11946 : /* No clobbers */);
11947 return result;
11950 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11951 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
11953 uint16x4_t result;
11954 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11955 : "=w"(result)
11956 : "w"(a), "w"(b)
11957 : /* No clobbers */);
11958 return result;
11961 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11962 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
11964 uint32x2_t result;
11965 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11966 : "=w"(result)
11967 : "w"(a), "w"(b)
11968 : /* No clobbers */);
11969 return result;
11972 #define vset_lane_f32(a, b, c) \
11973 __extension__ \
11974 ({ \
11975 float32x2_t b_ = (b); \
11976 float32_t a_ = (a); \
11977 float32x2_t result; \
11978 __asm__ ("ins %0.s[%3], %w1" \
11979 : "=w"(result) \
11980 : "r"(a_), "0"(b_), "i"(c) \
11981 : /* No clobbers */); \
11982 result; \
11985 #define vset_lane_f64(a, b, c) \
11986 __extension__ \
11987 ({ \
11988 float64x1_t b_ = (b); \
11989 float64_t a_ = (a); \
11990 float64x1_t result; \
11991 __asm__ ("ins %0.d[%3], %x1" \
11992 : "=w"(result) \
11993 : "r"(a_), "0"(b_), "i"(c) \
11994 : /* No clobbers */); \
11995 result; \
11998 #define vset_lane_p8(a, b, c) \
11999 __extension__ \
12000 ({ \
12001 poly8x8_t b_ = (b); \
12002 poly8_t a_ = (a); \
12003 poly8x8_t result; \
12004 __asm__ ("ins %0.b[%3], %w1" \
12005 : "=w"(result) \
12006 : "r"(a_), "0"(b_), "i"(c) \
12007 : /* No clobbers */); \
12008 result; \
12011 #define vset_lane_p16(a, b, c) \
12012 __extension__ \
12013 ({ \
12014 poly16x4_t b_ = (b); \
12015 poly16_t a_ = (a); \
12016 poly16x4_t result; \
12017 __asm__ ("ins %0.h[%3], %w1" \
12018 : "=w"(result) \
12019 : "r"(a_), "0"(b_), "i"(c) \
12020 : /* No clobbers */); \
12021 result; \
12024 #define vset_lane_s8(a, b, c) \
12025 __extension__ \
12026 ({ \
12027 int8x8_t b_ = (b); \
12028 int8_t a_ = (a); \
12029 int8x8_t result; \
12030 __asm__ ("ins %0.b[%3], %w1" \
12031 : "=w"(result) \
12032 : "r"(a_), "0"(b_), "i"(c) \
12033 : /* No clobbers */); \
12034 result; \
12037 #define vset_lane_s16(a, b, c) \
12038 __extension__ \
12039 ({ \
12040 int16x4_t b_ = (b); \
12041 int16_t a_ = (a); \
12042 int16x4_t result; \
12043 __asm__ ("ins %0.h[%3], %w1" \
12044 : "=w"(result) \
12045 : "r"(a_), "0"(b_), "i"(c) \
12046 : /* No clobbers */); \
12047 result; \
12050 #define vset_lane_s32(a, b, c) \
12051 __extension__ \
12052 ({ \
12053 int32x2_t b_ = (b); \
12054 int32_t a_ = (a); \
12055 int32x2_t result; \
12056 __asm__ ("ins %0.s[%3], %w1" \
12057 : "=w"(result) \
12058 : "r"(a_), "0"(b_), "i"(c) \
12059 : /* No clobbers */); \
12060 result; \
12063 #define vset_lane_s64(a, b, c) \
12064 __extension__ \
12065 ({ \
12066 int64x1_t b_ = (b); \
12067 int64_t a_ = (a); \
12068 int64x1_t result; \
12069 __asm__ ("ins %0.d[%3], %x1" \
12070 : "=w"(result) \
12071 : "r"(a_), "0"(b_), "i"(c) \
12072 : /* No clobbers */); \
12073 result; \
12076 #define vset_lane_u8(a, b, c) \
12077 __extension__ \
12078 ({ \
12079 uint8x8_t b_ = (b); \
12080 uint8_t a_ = (a); \
12081 uint8x8_t result; \
12082 __asm__ ("ins %0.b[%3], %w1" \
12083 : "=w"(result) \
12084 : "r"(a_), "0"(b_), "i"(c) \
12085 : /* No clobbers */); \
12086 result; \
12089 #define vset_lane_u16(a, b, c) \
12090 __extension__ \
12091 ({ \
12092 uint16x4_t b_ = (b); \
12093 uint16_t a_ = (a); \
12094 uint16x4_t result; \
12095 __asm__ ("ins %0.h[%3], %w1" \
12096 : "=w"(result) \
12097 : "r"(a_), "0"(b_), "i"(c) \
12098 : /* No clobbers */); \
12099 result; \
12102 #define vset_lane_u32(a, b, c) \
12103 __extension__ \
12104 ({ \
12105 uint32x2_t b_ = (b); \
12106 uint32_t a_ = (a); \
12107 uint32x2_t result; \
12108 __asm__ ("ins %0.s[%3], %w1" \
12109 : "=w"(result) \
12110 : "r"(a_), "0"(b_), "i"(c) \
12111 : /* No clobbers */); \
12112 result; \
12115 #define vset_lane_u64(a, b, c) \
12116 __extension__ \
12117 ({ \
12118 uint64x1_t b_ = (b); \
12119 uint64_t a_ = (a); \
12120 uint64x1_t result; \
12121 __asm__ ("ins %0.d[%3], %x1" \
12122 : "=w"(result) \
12123 : "r"(a_), "0"(b_), "i"(c) \
12124 : /* No clobbers */); \
12125 result; \
12128 #define vsetq_lane_f32(a, b, c) \
12129 __extension__ \
12130 ({ \
12131 float32x4_t b_ = (b); \
12132 float32_t a_ = (a); \
12133 float32x4_t result; \
12134 __asm__ ("ins %0.s[%3], %w1" \
12135 : "=w"(result) \
12136 : "r"(a_), "0"(b_), "i"(c) \
12137 : /* No clobbers */); \
12138 result; \
12141 #define vsetq_lane_f64(a, b, c) \
12142 __extension__ \
12143 ({ \
12144 float64x2_t b_ = (b); \
12145 float64_t a_ = (a); \
12146 float64x2_t result; \
12147 __asm__ ("ins %0.d[%3], %x1" \
12148 : "=w"(result) \
12149 : "r"(a_), "0"(b_), "i"(c) \
12150 : /* No clobbers */); \
12151 result; \
12154 #define vsetq_lane_p8(a, b, c) \
12155 __extension__ \
12156 ({ \
12157 poly8x16_t b_ = (b); \
12158 poly8_t a_ = (a); \
12159 poly8x16_t result; \
12160 __asm__ ("ins %0.b[%3], %w1" \
12161 : "=w"(result) \
12162 : "r"(a_), "0"(b_), "i"(c) \
12163 : /* No clobbers */); \
12164 result; \
12167 #define vsetq_lane_p16(a, b, c) \
12168 __extension__ \
12169 ({ \
12170 poly16x8_t b_ = (b); \
12171 poly16_t a_ = (a); \
12172 poly16x8_t result; \
12173 __asm__ ("ins %0.h[%3], %w1" \
12174 : "=w"(result) \
12175 : "r"(a_), "0"(b_), "i"(c) \
12176 : /* No clobbers */); \
12177 result; \
12180 #define vsetq_lane_s8(a, b, c) \
12181 __extension__ \
12182 ({ \
12183 int8x16_t b_ = (b); \
12184 int8_t a_ = (a); \
12185 int8x16_t result; \
12186 __asm__ ("ins %0.b[%3], %w1" \
12187 : "=w"(result) \
12188 : "r"(a_), "0"(b_), "i"(c) \
12189 : /* No clobbers */); \
12190 result; \
12193 #define vsetq_lane_s16(a, b, c) \
12194 __extension__ \
12195 ({ \
12196 int16x8_t b_ = (b); \
12197 int16_t a_ = (a); \
12198 int16x8_t result; \
12199 __asm__ ("ins %0.h[%3], %w1" \
12200 : "=w"(result) \
12201 : "r"(a_), "0"(b_), "i"(c) \
12202 : /* No clobbers */); \
12203 result; \
12206 #define vsetq_lane_s32(a, b, c) \
12207 __extension__ \
12208 ({ \
12209 int32x4_t b_ = (b); \
12210 int32_t a_ = (a); \
12211 int32x4_t result; \
12212 __asm__ ("ins %0.s[%3], %w1" \
12213 : "=w"(result) \
12214 : "r"(a_), "0"(b_), "i"(c) \
12215 : /* No clobbers */); \
12216 result; \
12219 #define vsetq_lane_s64(a, b, c) \
12220 __extension__ \
12221 ({ \
12222 int64x2_t b_ = (b); \
12223 int64_t a_ = (a); \
12224 int64x2_t result; \
12225 __asm__ ("ins %0.d[%3], %x1" \
12226 : "=w"(result) \
12227 : "r"(a_), "0"(b_), "i"(c) \
12228 : /* No clobbers */); \
12229 result; \
12232 #define vsetq_lane_u8(a, b, c) \
12233 __extension__ \
12234 ({ \
12235 uint8x16_t b_ = (b); \
12236 uint8_t a_ = (a); \
12237 uint8x16_t result; \
12238 __asm__ ("ins %0.b[%3], %w1" \
12239 : "=w"(result) \
12240 : "r"(a_), "0"(b_), "i"(c) \
12241 : /* No clobbers */); \
12242 result; \
12245 #define vsetq_lane_u16(a, b, c) \
12246 __extension__ \
12247 ({ \
12248 uint16x8_t b_ = (b); \
12249 uint16_t a_ = (a); \
12250 uint16x8_t result; \
12251 __asm__ ("ins %0.h[%3], %w1" \
12252 : "=w"(result) \
12253 : "r"(a_), "0"(b_), "i"(c) \
12254 : /* No clobbers */); \
12255 result; \
12258 #define vsetq_lane_u32(a, b, c) \
12259 __extension__ \
12260 ({ \
12261 uint32x4_t b_ = (b); \
12262 uint32_t a_ = (a); \
12263 uint32x4_t result; \
12264 __asm__ ("ins %0.s[%3], %w1" \
12265 : "=w"(result) \
12266 : "r"(a_), "0"(b_), "i"(c) \
12267 : /* No clobbers */); \
12268 result; \
12271 #define vsetq_lane_u64(a, b, c) \
12272 __extension__ \
12273 ({ \
12274 uint64x2_t b_ = (b); \
12275 uint64_t a_ = (a); \
12276 uint64x2_t result; \
12277 __asm__ ("ins %0.d[%3], %x1" \
12278 : "=w"(result) \
12279 : "r"(a_), "0"(b_), "i"(c) \
12280 : /* No clobbers */); \
12281 result; \
12284 #define vshrn_high_n_s16(a, b, c) \
12285 __extension__ \
12286 ({ \
12287 int16x8_t b_ = (b); \
12288 int8x8_t a_ = (a); \
12289 int8x16_t result = vcombine_s8 \
12290 (a_, vcreate_s8 \
12291 (__AARCH64_UINT64_C (0x0))); \
12292 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
12293 : "+w"(result) \
12294 : "w"(b_), "i"(c) \
12295 : /* No clobbers */); \
12296 result; \
12299 #define vshrn_high_n_s32(a, b, c) \
12300 __extension__ \
12301 ({ \
12302 int32x4_t b_ = (b); \
12303 int16x4_t a_ = (a); \
12304 int16x8_t result = vcombine_s16 \
12305 (a_, vcreate_s16 \
12306 (__AARCH64_UINT64_C (0x0))); \
12307 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
12308 : "+w"(result) \
12309 : "w"(b_), "i"(c) \
12310 : /* No clobbers */); \
12311 result; \
12314 #define vshrn_high_n_s64(a, b, c) \
12315 __extension__ \
12316 ({ \
12317 int64x2_t b_ = (b); \
12318 int32x2_t a_ = (a); \
12319 int32x4_t result = vcombine_s32 \
12320 (a_, vcreate_s32 \
12321 (__AARCH64_UINT64_C (0x0))); \
12322 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
12323 : "+w"(result) \
12324 : "w"(b_), "i"(c) \
12325 : /* No clobbers */); \
12326 result; \
12329 #define vshrn_high_n_u16(a, b, c) \
12330 __extension__ \
12331 ({ \
12332 uint16x8_t b_ = (b); \
12333 uint8x8_t a_ = (a); \
12334 uint8x16_t result = vcombine_u8 \
12335 (a_, vcreate_u8 \
12336 (__AARCH64_UINT64_C (0x0))); \
12337 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
12338 : "+w"(result) \
12339 : "w"(b_), "i"(c) \
12340 : /* No clobbers */); \
12341 result; \
12344 #define vshrn_high_n_u32(a, b, c) \
12345 __extension__ \
12346 ({ \
12347 uint32x4_t b_ = (b); \
12348 uint16x4_t a_ = (a); \
12349 uint16x8_t result = vcombine_u16 \
12350 (a_, vcreate_u16 \
12351 (__AARCH64_UINT64_C (0x0))); \
12352 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
12353 : "+w"(result) \
12354 : "w"(b_), "i"(c) \
12355 : /* No clobbers */); \
12356 result; \
12359 #define vshrn_high_n_u64(a, b, c) \
12360 __extension__ \
12361 ({ \
12362 uint64x2_t b_ = (b); \
12363 uint32x2_t a_ = (a); \
12364 uint32x4_t result = vcombine_u32 \
12365 (a_, vcreate_u32 \
12366 (__AARCH64_UINT64_C (0x0))); \
12367 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
12368 : "+w"(result) \
12369 : "w"(b_), "i"(c) \
12370 : /* No clobbers */); \
12371 result; \
12374 #define vshrn_n_s16(a, b) \
12375 __extension__ \
12376 ({ \
12377 int16x8_t a_ = (a); \
12378 int8x8_t result; \
12379 __asm__ ("shrn %0.8b,%1.8h,%2" \
12380 : "=w"(result) \
12381 : "w"(a_), "i"(b) \
12382 : /* No clobbers */); \
12383 result; \
12386 #define vshrn_n_s32(a, b) \
12387 __extension__ \
12388 ({ \
12389 int32x4_t a_ = (a); \
12390 int16x4_t result; \
12391 __asm__ ("shrn %0.4h,%1.4s,%2" \
12392 : "=w"(result) \
12393 : "w"(a_), "i"(b) \
12394 : /* No clobbers */); \
12395 result; \
12398 #define vshrn_n_s64(a, b) \
12399 __extension__ \
12400 ({ \
12401 int64x2_t a_ = (a); \
12402 int32x2_t result; \
12403 __asm__ ("shrn %0.2s,%1.2d,%2" \
12404 : "=w"(result) \
12405 : "w"(a_), "i"(b) \
12406 : /* No clobbers */); \
12407 result; \
12410 #define vshrn_n_u16(a, b) \
12411 __extension__ \
12412 ({ \
12413 uint16x8_t a_ = (a); \
12414 uint8x8_t result; \
12415 __asm__ ("shrn %0.8b,%1.8h,%2" \
12416 : "=w"(result) \
12417 : "w"(a_), "i"(b) \
12418 : /* No clobbers */); \
12419 result; \
12422 #define vshrn_n_u32(a, b) \
12423 __extension__ \
12424 ({ \
12425 uint32x4_t a_ = (a); \
12426 uint16x4_t result; \
12427 __asm__ ("shrn %0.4h,%1.4s,%2" \
12428 : "=w"(result) \
12429 : "w"(a_), "i"(b) \
12430 : /* No clobbers */); \
12431 result; \
12434 #define vshrn_n_u64(a, b) \
12435 __extension__ \
12436 ({ \
12437 uint64x2_t a_ = (a); \
12438 uint32x2_t result; \
12439 __asm__ ("shrn %0.2s,%1.2d,%2" \
12440 : "=w"(result) \
12441 : "w"(a_), "i"(b) \
12442 : /* No clobbers */); \
12443 result; \
12446 #define vsli_n_p8(a, b, c) \
12447 __extension__ \
12448 ({ \
12449 poly8x8_t b_ = (b); \
12450 poly8x8_t a_ = (a); \
12451 poly8x8_t result; \
12452 __asm__ ("sli %0.8b,%2.8b,%3" \
12453 : "=w"(result) \
12454 : "0"(a_), "w"(b_), "i"(c) \
12455 : /* No clobbers */); \
12456 result; \
12459 #define vsli_n_p16(a, b, c) \
12460 __extension__ \
12461 ({ \
12462 poly16x4_t b_ = (b); \
12463 poly16x4_t a_ = (a); \
12464 poly16x4_t result; \
12465 __asm__ ("sli %0.4h,%2.4h,%3" \
12466 : "=w"(result) \
12467 : "0"(a_), "w"(b_), "i"(c) \
12468 : /* No clobbers */); \
12469 result; \
12472 #define vsliq_n_p8(a, b, c) \
12473 __extension__ \
12474 ({ \
12475 poly8x16_t b_ = (b); \
12476 poly8x16_t a_ = (a); \
12477 poly8x16_t result; \
12478 __asm__ ("sli %0.16b,%2.16b,%3" \
12479 : "=w"(result) \
12480 : "0"(a_), "w"(b_), "i"(c) \
12481 : /* No clobbers */); \
12482 result; \
12485 #define vsliq_n_p16(a, b, c) \
12486 __extension__ \
12487 ({ \
12488 poly16x8_t b_ = (b); \
12489 poly16x8_t a_ = (a); \
12490 poly16x8_t result; \
12491 __asm__ ("sli %0.8h,%2.8h,%3" \
12492 : "=w"(result) \
12493 : "0"(a_), "w"(b_), "i"(c) \
12494 : /* No clobbers */); \
12495 result; \
12498 #define vsri_n_p8(a, b, c) \
12499 __extension__ \
12500 ({ \
12501 poly8x8_t b_ = (b); \
12502 poly8x8_t a_ = (a); \
12503 poly8x8_t result; \
12504 __asm__ ("sri %0.8b,%2.8b,%3" \
12505 : "=w"(result) \
12506 : "0"(a_), "w"(b_), "i"(c) \
12507 : /* No clobbers */); \
12508 result; \
12511 #define vsri_n_p16(a, b, c) \
12512 __extension__ \
12513 ({ \
12514 poly16x4_t b_ = (b); \
12515 poly16x4_t a_ = (a); \
12516 poly16x4_t result; \
12517 __asm__ ("sri %0.4h,%2.4h,%3" \
12518 : "=w"(result) \
12519 : "0"(a_), "w"(b_), "i"(c) \
12520 : /* No clobbers */); \
12521 result; \
12524 #define vsriq_n_p8(a, b, c) \
12525 __extension__ \
12526 ({ \
12527 poly8x16_t b_ = (b); \
12528 poly8x16_t a_ = (a); \
12529 poly8x16_t result; \
12530 __asm__ ("sri %0.16b,%2.16b,%3" \
12531 : "=w"(result) \
12532 : "0"(a_), "w"(b_), "i"(c) \
12533 : /* No clobbers */); \
12534 result; \
12537 #define vsriq_n_p16(a, b, c) \
12538 __extension__ \
12539 ({ \
12540 poly16x8_t b_ = (b); \
12541 poly16x8_t a_ = (a); \
12542 poly16x8_t result; \
12543 __asm__ ("sri %0.8h,%2.8h,%3" \
12544 : "=w"(result) \
12545 : "0"(a_), "w"(b_), "i"(c) \
12546 : /* No clobbers */); \
12547 result; \
12550 #define vst1_lane_f32(a, b, c) \
12551 __extension__ \
12552 ({ \
12553 float32x2_t b_ = (b); \
12554 float32_t * a_ = (a); \
12555 __asm__ ("st1 {%1.s}[%2],[%0]" \
12557 : "r"(a_), "w"(b_), "i"(c) \
12558 : "memory"); \
12561 #define vst1_lane_f64(a, b, c) \
12562 __extension__ \
12563 ({ \
12564 float64x1_t b_ = (b); \
12565 float64_t * a_ = (a); \
12566 __asm__ ("st1 {%1.d}[%2],[%0]" \
12568 : "r"(a_), "w"(b_), "i"(c) \
12569 : "memory"); \
12572 #define vst1_lane_p8(a, b, c) \
12573 __extension__ \
12574 ({ \
12575 poly8x8_t b_ = (b); \
12576 poly8_t * a_ = (a); \
12577 __asm__ ("st1 {%1.b}[%2],[%0]" \
12579 : "r"(a_), "w"(b_), "i"(c) \
12580 : "memory"); \
12583 #define vst1_lane_p16(a, b, c) \
12584 __extension__ \
12585 ({ \
12586 poly16x4_t b_ = (b); \
12587 poly16_t * a_ = (a); \
12588 __asm__ ("st1 {%1.h}[%2],[%0]" \
12590 : "r"(a_), "w"(b_), "i"(c) \
12591 : "memory"); \
12594 #define vst1_lane_s8(a, b, c) \
12595 __extension__ \
12596 ({ \
12597 int8x8_t b_ = (b); \
12598 int8_t * a_ = (a); \
12599 __asm__ ("st1 {%1.b}[%2],[%0]" \
12601 : "r"(a_), "w"(b_), "i"(c) \
12602 : "memory"); \
12605 #define vst1_lane_s16(a, b, c) \
12606 __extension__ \
12607 ({ \
12608 int16x4_t b_ = (b); \
12609 int16_t * a_ = (a); \
12610 __asm__ ("st1 {%1.h}[%2],[%0]" \
12612 : "r"(a_), "w"(b_), "i"(c) \
12613 : "memory"); \
12616 #define vst1_lane_s32(a, b, c) \
12617 __extension__ \
12618 ({ \
12619 int32x2_t b_ = (b); \
12620 int32_t * a_ = (a); \
12621 __asm__ ("st1 {%1.s}[%2],[%0]" \
12623 : "r"(a_), "w"(b_), "i"(c) \
12624 : "memory"); \
12627 #define vst1_lane_s64(a, b, c) \
12628 __extension__ \
12629 ({ \
12630 int64x1_t b_ = (b); \
12631 int64_t * a_ = (a); \
12632 __asm__ ("st1 {%1.d}[%2],[%0]" \
12634 : "r"(a_), "w"(b_), "i"(c) \
12635 : "memory"); \
12638 #define vst1_lane_u8(a, b, c) \
12639 __extension__ \
12640 ({ \
12641 uint8x8_t b_ = (b); \
12642 uint8_t * a_ = (a); \
12643 __asm__ ("st1 {%1.b}[%2],[%0]" \
12645 : "r"(a_), "w"(b_), "i"(c) \
12646 : "memory"); \
12649 #define vst1_lane_u16(a, b, c) \
12650 __extension__ \
12651 ({ \
12652 uint16x4_t b_ = (b); \
12653 uint16_t * a_ = (a); \
12654 __asm__ ("st1 {%1.h}[%2],[%0]" \
12656 : "r"(a_), "w"(b_), "i"(c) \
12657 : "memory"); \
12660 #define vst1_lane_u32(a, b, c) \
12661 __extension__ \
12662 ({ \
12663 uint32x2_t b_ = (b); \
12664 uint32_t * a_ = (a); \
12665 __asm__ ("st1 {%1.s}[%2],[%0]" \
12667 : "r"(a_), "w"(b_), "i"(c) \
12668 : "memory"); \
12671 #define vst1_lane_u64(a, b, c) \
12672 __extension__ \
12673 ({ \
12674 uint64x1_t b_ = (b); \
12675 uint64_t * a_ = (a); \
12676 __asm__ ("st1 {%1.d}[%2],[%0]" \
12678 : "r"(a_), "w"(b_), "i"(c) \
12679 : "memory"); \
12683 #define vst1q_lane_f32(a, b, c) \
12684 __extension__ \
12685 ({ \
12686 float32x4_t b_ = (b); \
12687 float32_t * a_ = (a); \
12688 __asm__ ("st1 {%1.s}[%2],[%0]" \
12690 : "r"(a_), "w"(b_), "i"(c) \
12691 : "memory"); \
12694 #define vst1q_lane_f64(a, b, c) \
12695 __extension__ \
12696 ({ \
12697 float64x2_t b_ = (b); \
12698 float64_t * a_ = (a); \
12699 __asm__ ("st1 {%1.d}[%2],[%0]" \
12701 : "r"(a_), "w"(b_), "i"(c) \
12702 : "memory"); \
12705 #define vst1q_lane_p8(a, b, c) \
12706 __extension__ \
12707 ({ \
12708 poly8x16_t b_ = (b); \
12709 poly8_t * a_ = (a); \
12710 __asm__ ("st1 {%1.b}[%2],[%0]" \
12712 : "r"(a_), "w"(b_), "i"(c) \
12713 : "memory"); \
12716 #define vst1q_lane_p16(a, b, c) \
12717 __extension__ \
12718 ({ \
12719 poly16x8_t b_ = (b); \
12720 poly16_t * a_ = (a); \
12721 __asm__ ("st1 {%1.h}[%2],[%0]" \
12723 : "r"(a_), "w"(b_), "i"(c) \
12724 : "memory"); \
12727 #define vst1q_lane_s8(a, b, c) \
12728 __extension__ \
12729 ({ \
12730 int8x16_t b_ = (b); \
12731 int8_t * a_ = (a); \
12732 __asm__ ("st1 {%1.b}[%2],[%0]" \
12734 : "r"(a_), "w"(b_), "i"(c) \
12735 : "memory"); \
12738 #define vst1q_lane_s16(a, b, c) \
12739 __extension__ \
12740 ({ \
12741 int16x8_t b_ = (b); \
12742 int16_t * a_ = (a); \
12743 __asm__ ("st1 {%1.h}[%2],[%0]" \
12745 : "r"(a_), "w"(b_), "i"(c) \
12746 : "memory"); \
12749 #define vst1q_lane_s32(a, b, c) \
12750 __extension__ \
12751 ({ \
12752 int32x4_t b_ = (b); \
12753 int32_t * a_ = (a); \
12754 __asm__ ("st1 {%1.s}[%2],[%0]" \
12756 : "r"(a_), "w"(b_), "i"(c) \
12757 : "memory"); \
12760 #define vst1q_lane_s64(a, b, c) \
12761 __extension__ \
12762 ({ \
12763 int64x2_t b_ = (b); \
12764 int64_t * a_ = (a); \
12765 __asm__ ("st1 {%1.d}[%2],[%0]" \
12767 : "r"(a_), "w"(b_), "i"(c) \
12768 : "memory"); \
12771 #define vst1q_lane_u8(a, b, c) \
12772 __extension__ \
12773 ({ \
12774 uint8x16_t b_ = (b); \
12775 uint8_t * a_ = (a); \
12776 __asm__ ("st1 {%1.b}[%2],[%0]" \
12778 : "r"(a_), "w"(b_), "i"(c) \
12779 : "memory"); \
12782 #define vst1q_lane_u16(a, b, c) \
12783 __extension__ \
12784 ({ \
12785 uint16x8_t b_ = (b); \
12786 uint16_t * a_ = (a); \
12787 __asm__ ("st1 {%1.h}[%2],[%0]" \
12789 : "r"(a_), "w"(b_), "i"(c) \
12790 : "memory"); \
12793 #define vst1q_lane_u32(a, b, c) \
12794 __extension__ \
12795 ({ \
12796 uint32x4_t b_ = (b); \
12797 uint32_t * a_ = (a); \
12798 __asm__ ("st1 {%1.s}[%2],[%0]" \
12800 : "r"(a_), "w"(b_), "i"(c) \
12801 : "memory"); \
12804 #define vst1q_lane_u64(a, b, c) \
12805 __extension__ \
12806 ({ \
12807 uint64x2_t b_ = (b); \
12808 uint64_t * a_ = (a); \
12809 __asm__ ("st1 {%1.d}[%2],[%0]" \
12811 : "r"(a_), "w"(b_), "i"(c) \
12812 : "memory"); \
12815 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12816 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12818 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12819 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12820 : "+w"(result)
12821 : "w"(b), "w"(c)
12822 : /* No clobbers */);
12823 return result;
12826 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12827 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12829 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12830 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12831 : "+w"(result)
12832 : "w"(b), "w"(c)
12833 : /* No clobbers */);
12834 return result;
12837 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12838 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12840 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12841 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12842 : "+w"(result)
12843 : "w"(b), "w"(c)
12844 : /* No clobbers */);
12845 return result;
12848 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12849 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12851 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12852 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12853 : "+w"(result)
12854 : "w"(b), "w"(c)
12855 : /* No clobbers */);
12856 return result;
12859 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12860 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12862 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12863 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12864 : "+w"(result)
12865 : "w"(b), "w"(c)
12866 : /* No clobbers */);
12867 return result;
12870 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12871 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12873 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12874 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12875 : "+w"(result)
12876 : "w"(b), "w"(c)
12877 : /* No clobbers */);
12878 return result;
12881 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12882 vsubhn_s16 (int16x8_t a, int16x8_t b)
12884 int8x8_t result;
12885 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12886 : "=w"(result)
12887 : "w"(a), "w"(b)
12888 : /* No clobbers */);
12889 return result;
12892 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12893 vsubhn_s32 (int32x4_t a, int32x4_t b)
12895 int16x4_t result;
12896 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12897 : "=w"(result)
12898 : "w"(a), "w"(b)
12899 : /* No clobbers */);
12900 return result;
12903 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12904 vsubhn_s64 (int64x2_t a, int64x2_t b)
12906 int32x2_t result;
12907 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12908 : "=w"(result)
12909 : "w"(a), "w"(b)
12910 : /* No clobbers */);
12911 return result;
12914 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12915 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
12917 uint8x8_t result;
12918 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12919 : "=w"(result)
12920 : "w"(a), "w"(b)
12921 : /* No clobbers */);
12922 return result;
12925 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12926 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
12928 uint16x4_t result;
12929 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12930 : "=w"(result)
12931 : "w"(a), "w"(b)
12932 : /* No clobbers */);
12933 return result;
12936 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12937 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
12939 uint32x2_t result;
12940 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12941 : "=w"(result)
12942 : "w"(a), "w"(b)
12943 : /* No clobbers */);
12944 return result;
12947 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12948 vtrn1_f32 (float32x2_t a, float32x2_t b)
12950 float32x2_t result;
12951 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12952 : "=w"(result)
12953 : "w"(a), "w"(b)
12954 : /* No clobbers */);
12955 return result;
12958 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12959 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
12961 poly8x8_t result;
12962 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12963 : "=w"(result)
12964 : "w"(a), "w"(b)
12965 : /* No clobbers */);
12966 return result;
12969 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12970 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
12972 poly16x4_t result;
12973 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12974 : "=w"(result)
12975 : "w"(a), "w"(b)
12976 : /* No clobbers */);
12977 return result;
12980 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12981 vtrn1_s8 (int8x8_t a, int8x8_t b)
12983 int8x8_t result;
12984 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12985 : "=w"(result)
12986 : "w"(a), "w"(b)
12987 : /* No clobbers */);
12988 return result;
12991 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12992 vtrn1_s16 (int16x4_t a, int16x4_t b)
12994 int16x4_t result;
12995 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12996 : "=w"(result)
12997 : "w"(a), "w"(b)
12998 : /* No clobbers */);
12999 return result;
13002 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13003 vtrn1_s32 (int32x2_t a, int32x2_t b)
13005 int32x2_t result;
13006 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
13007 : "=w"(result)
13008 : "w"(a), "w"(b)
13009 : /* No clobbers */);
13010 return result;
13013 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13014 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
13016 uint8x8_t result;
13017 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
13018 : "=w"(result)
13019 : "w"(a), "w"(b)
13020 : /* No clobbers */);
13021 return result;
13024 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13025 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
13027 uint16x4_t result;
13028 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
13029 : "=w"(result)
13030 : "w"(a), "w"(b)
13031 : /* No clobbers */);
13032 return result;
13035 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13036 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
13038 uint32x2_t result;
13039 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
13040 : "=w"(result)
13041 : "w"(a), "w"(b)
13042 : /* No clobbers */);
13043 return result;
13046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13047 vtrn1q_f32 (float32x4_t a, float32x4_t b)
13049 float32x4_t result;
13050 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13051 : "=w"(result)
13052 : "w"(a), "w"(b)
13053 : /* No clobbers */);
13054 return result;
13057 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13058 vtrn1q_f64 (float64x2_t a, float64x2_t b)
13060 float64x2_t result;
13061 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13062 : "=w"(result)
13063 : "w"(a), "w"(b)
13064 : /* No clobbers */);
13065 return result;
13068 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13069 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
13071 poly8x16_t result;
13072 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13073 : "=w"(result)
13074 : "w"(a), "w"(b)
13075 : /* No clobbers */);
13076 return result;
13079 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13080 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
13082 poly16x8_t result;
13083 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13084 : "=w"(result)
13085 : "w"(a), "w"(b)
13086 : /* No clobbers */);
13087 return result;
13090 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13091 vtrn1q_s8 (int8x16_t a, int8x16_t b)
13093 int8x16_t result;
13094 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13095 : "=w"(result)
13096 : "w"(a), "w"(b)
13097 : /* No clobbers */);
13098 return result;
13101 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13102 vtrn1q_s16 (int16x8_t a, int16x8_t b)
13104 int16x8_t result;
13105 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13106 : "=w"(result)
13107 : "w"(a), "w"(b)
13108 : /* No clobbers */);
13109 return result;
13112 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13113 vtrn1q_s32 (int32x4_t a, int32x4_t b)
13115 int32x4_t result;
13116 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13117 : "=w"(result)
13118 : "w"(a), "w"(b)
13119 : /* No clobbers */);
13120 return result;
13123 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13124 vtrn1q_s64 (int64x2_t a, int64x2_t b)
13126 int64x2_t result;
13127 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13128 : "=w"(result)
13129 : "w"(a), "w"(b)
13130 : /* No clobbers */);
13131 return result;
13134 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13135 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
13137 uint8x16_t result;
13138 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
13139 : "=w"(result)
13140 : "w"(a), "w"(b)
13141 : /* No clobbers */);
13142 return result;
13145 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13146 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
13148 uint16x8_t result;
13149 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
13150 : "=w"(result)
13151 : "w"(a), "w"(b)
13152 : /* No clobbers */);
13153 return result;
13156 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13157 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
13159 uint32x4_t result;
13160 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
13161 : "=w"(result)
13162 : "w"(a), "w"(b)
13163 : /* No clobbers */);
13164 return result;
13167 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13168 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
13170 uint64x2_t result;
13171 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
13172 : "=w"(result)
13173 : "w"(a), "w"(b)
13174 : /* No clobbers */);
13175 return result;
13178 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13179 vtrn2_f32 (float32x2_t a, float32x2_t b)
13181 float32x2_t result;
13182 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
13183 : "=w"(result)
13184 : "w"(a), "w"(b)
13185 : /* No clobbers */);
13186 return result;
13189 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13190 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
13192 poly8x8_t result;
13193 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
13194 : "=w"(result)
13195 : "w"(a), "w"(b)
13196 : /* No clobbers */);
13197 return result;
13200 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13201 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
13203 poly16x4_t result;
13204 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
13205 : "=w"(result)
13206 : "w"(a), "w"(b)
13207 : /* No clobbers */);
13208 return result;
13211 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13212 vtrn2_s8 (int8x8_t a, int8x8_t b)
13214 int8x8_t result;
13215 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
13216 : "=w"(result)
13217 : "w"(a), "w"(b)
13218 : /* No clobbers */);
13219 return result;
13222 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13223 vtrn2_s16 (int16x4_t a, int16x4_t b)
13225 int16x4_t result;
13226 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
13227 : "=w"(result)
13228 : "w"(a), "w"(b)
13229 : /* No clobbers */);
13230 return result;
13233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13234 vtrn2_s32 (int32x2_t a, int32x2_t b)
13236 int32x2_t result;
13237 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
13238 : "=w"(result)
13239 : "w"(a), "w"(b)
13240 : /* No clobbers */);
13241 return result;
13244 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13245 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
13247 uint8x8_t result;
13248 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
13249 : "=w"(result)
13250 : "w"(a), "w"(b)
13251 : /* No clobbers */);
13252 return result;
13255 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13256 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
13258 uint16x4_t result;
13259 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
13260 : "=w"(result)
13261 : "w"(a), "w"(b)
13262 : /* No clobbers */);
13263 return result;
13266 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13267 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
13269 uint32x2_t result;
13270 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
13271 : "=w"(result)
13272 : "w"(a), "w"(b)
13273 : /* No clobbers */);
13274 return result;
13277 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13278 vtrn2q_f32 (float32x4_t a, float32x4_t b)
13280 float32x4_t result;
13281 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13282 : "=w"(result)
13283 : "w"(a), "w"(b)
13284 : /* No clobbers */);
13285 return result;
13288 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13289 vtrn2q_f64 (float64x2_t a, float64x2_t b)
13291 float64x2_t result;
13292 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13293 : "=w"(result)
13294 : "w"(a), "w"(b)
13295 : /* No clobbers */);
13296 return result;
13299 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13300 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
13302 poly8x16_t result;
13303 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13304 : "=w"(result)
13305 : "w"(a), "w"(b)
13306 : /* No clobbers */);
13307 return result;
13310 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13311 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
13313 poly16x8_t result;
13314 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13315 : "=w"(result)
13316 : "w"(a), "w"(b)
13317 : /* No clobbers */);
13318 return result;
13321 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13322 vtrn2q_s8 (int8x16_t a, int8x16_t b)
13324 int8x16_t result;
13325 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13326 : "=w"(result)
13327 : "w"(a), "w"(b)
13328 : /* No clobbers */);
13329 return result;
13332 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13333 vtrn2q_s16 (int16x8_t a, int16x8_t b)
13335 int16x8_t result;
13336 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13337 : "=w"(result)
13338 : "w"(a), "w"(b)
13339 : /* No clobbers */);
13340 return result;
13343 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13344 vtrn2q_s32 (int32x4_t a, int32x4_t b)
13346 int32x4_t result;
13347 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13348 : "=w"(result)
13349 : "w"(a), "w"(b)
13350 : /* No clobbers */);
13351 return result;
13354 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13355 vtrn2q_s64 (int64x2_t a, int64x2_t b)
13357 int64x2_t result;
13358 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13359 : "=w"(result)
13360 : "w"(a), "w"(b)
13361 : /* No clobbers */);
13362 return result;
13365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13366 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
13368 uint8x16_t result;
13369 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13370 : "=w"(result)
13371 : "w"(a), "w"(b)
13372 : /* No clobbers */);
13373 return result;
13376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13377 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
13379 uint16x8_t result;
13380 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13381 : "=w"(result)
13382 : "w"(a), "w"(b)
13383 : /* No clobbers */);
13384 return result;
13387 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13388 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
13390 uint32x4_t result;
13391 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13392 : "=w"(result)
13393 : "w"(a), "w"(b)
13394 : /* No clobbers */);
13395 return result;
13398 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13399 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
13401 uint64x2_t result;
13402 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13403 : "=w"(result)
13404 : "w"(a), "w"(b)
13405 : /* No clobbers */);
13406 return result;
13409 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13410 vtst_p8 (poly8x8_t a, poly8x8_t b)
13412 uint8x8_t result;
13413 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
13414 : "=w"(result)
13415 : "w"(a), "w"(b)
13416 : /* No clobbers */);
13417 return result;
13420 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13421 vtst_p16 (poly16x4_t a, poly16x4_t b)
13423 uint16x4_t result;
13424 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
13425 : "=w"(result)
13426 : "w"(a), "w"(b)
13427 : /* No clobbers */);
13428 return result;
13431 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13432 vtstq_p8 (poly8x16_t a, poly8x16_t b)
13434 uint8x16_t result;
13435 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
13436 : "=w"(result)
13437 : "w"(a), "w"(b)
13438 : /* No clobbers */);
13439 return result;
13442 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13443 vtstq_p16 (poly16x8_t a, poly16x8_t b)
13445 uint16x8_t result;
13446 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
13447 : "=w"(result)
13448 : "w"(a), "w"(b)
13449 : /* No clobbers */);
13450 return result;
13452 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13453 vuzp1_f32 (float32x2_t a, float32x2_t b)
13455 float32x2_t result;
13456 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13457 : "=w"(result)
13458 : "w"(a), "w"(b)
13459 : /* No clobbers */);
13460 return result;
13463 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13464 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
13466 poly8x8_t result;
13467 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13468 : "=w"(result)
13469 : "w"(a), "w"(b)
13470 : /* No clobbers */);
13471 return result;
13474 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13475 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
13477 poly16x4_t result;
13478 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13479 : "=w"(result)
13480 : "w"(a), "w"(b)
13481 : /* No clobbers */);
13482 return result;
13485 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13486 vuzp1_s8 (int8x8_t a, int8x8_t b)
13488 int8x8_t result;
13489 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13490 : "=w"(result)
13491 : "w"(a), "w"(b)
13492 : /* No clobbers */);
13493 return result;
13496 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13497 vuzp1_s16 (int16x4_t a, int16x4_t b)
13499 int16x4_t result;
13500 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13501 : "=w"(result)
13502 : "w"(a), "w"(b)
13503 : /* No clobbers */);
13504 return result;
13507 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13508 vuzp1_s32 (int32x2_t a, int32x2_t b)
13510 int32x2_t result;
13511 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13512 : "=w"(result)
13513 : "w"(a), "w"(b)
13514 : /* No clobbers */);
13515 return result;
13518 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13519 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
13521 uint8x8_t result;
13522 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13523 : "=w"(result)
13524 : "w"(a), "w"(b)
13525 : /* No clobbers */);
13526 return result;
13529 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13530 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
13532 uint16x4_t result;
13533 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13534 : "=w"(result)
13535 : "w"(a), "w"(b)
13536 : /* No clobbers */);
13537 return result;
13540 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13541 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
13543 uint32x2_t result;
13544 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13545 : "=w"(result)
13546 : "w"(a), "w"(b)
13547 : /* No clobbers */);
13548 return result;
13551 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13552 vuzp1q_f32 (float32x4_t a, float32x4_t b)
13554 float32x4_t result;
13555 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13556 : "=w"(result)
13557 : "w"(a), "w"(b)
13558 : /* No clobbers */);
13559 return result;
13562 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13563 vuzp1q_f64 (float64x2_t a, float64x2_t b)
13565 float64x2_t result;
13566 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13567 : "=w"(result)
13568 : "w"(a), "w"(b)
13569 : /* No clobbers */);
13570 return result;
13573 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13574 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
13576 poly8x16_t result;
13577 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13578 : "=w"(result)
13579 : "w"(a), "w"(b)
13580 : /* No clobbers */);
13581 return result;
13584 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13585 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
13587 poly16x8_t result;
13588 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13589 : "=w"(result)
13590 : "w"(a), "w"(b)
13591 : /* No clobbers */);
13592 return result;
13595 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13596 vuzp1q_s8 (int8x16_t a, int8x16_t b)
13598 int8x16_t result;
13599 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13600 : "=w"(result)
13601 : "w"(a), "w"(b)
13602 : /* No clobbers */);
13603 return result;
13606 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13607 vuzp1q_s16 (int16x8_t a, int16x8_t b)
13609 int16x8_t result;
13610 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13611 : "=w"(result)
13612 : "w"(a), "w"(b)
13613 : /* No clobbers */);
13614 return result;
13617 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13618 vuzp1q_s32 (int32x4_t a, int32x4_t b)
13620 int32x4_t result;
13621 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13622 : "=w"(result)
13623 : "w"(a), "w"(b)
13624 : /* No clobbers */);
13625 return result;
13628 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13629 vuzp1q_s64 (int64x2_t a, int64x2_t b)
13631 int64x2_t result;
13632 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13633 : "=w"(result)
13634 : "w"(a), "w"(b)
13635 : /* No clobbers */);
13636 return result;
13639 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13640 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
13642 uint8x16_t result;
13643 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13644 : "=w"(result)
13645 : "w"(a), "w"(b)
13646 : /* No clobbers */);
13647 return result;
13650 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13651 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
13653 uint16x8_t result;
13654 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13655 : "=w"(result)
13656 : "w"(a), "w"(b)
13657 : /* No clobbers */);
13658 return result;
13661 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13662 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
13664 uint32x4_t result;
13665 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13666 : "=w"(result)
13667 : "w"(a), "w"(b)
13668 : /* No clobbers */);
13669 return result;
13672 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13673 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
13675 uint64x2_t result;
13676 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13677 : "=w"(result)
13678 : "w"(a), "w"(b)
13679 : /* No clobbers */);
13680 return result;
13683 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13684 vuzp2_f32 (float32x2_t a, float32x2_t b)
13686 float32x2_t result;
13687 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13688 : "=w"(result)
13689 : "w"(a), "w"(b)
13690 : /* No clobbers */);
13691 return result;
13694 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13695 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
13697 poly8x8_t result;
13698 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13699 : "=w"(result)
13700 : "w"(a), "w"(b)
13701 : /* No clobbers */);
13702 return result;
13705 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13706 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
13708 poly16x4_t result;
13709 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13710 : "=w"(result)
13711 : "w"(a), "w"(b)
13712 : /* No clobbers */);
13713 return result;
13716 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13717 vuzp2_s8 (int8x8_t a, int8x8_t b)
13719 int8x8_t result;
13720 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13721 : "=w"(result)
13722 : "w"(a), "w"(b)
13723 : /* No clobbers */);
13724 return result;
13727 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13728 vuzp2_s16 (int16x4_t a, int16x4_t b)
13730 int16x4_t result;
13731 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13732 : "=w"(result)
13733 : "w"(a), "w"(b)
13734 : /* No clobbers */);
13735 return result;
13738 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13739 vuzp2_s32 (int32x2_t a, int32x2_t b)
13741 int32x2_t result;
13742 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13743 : "=w"(result)
13744 : "w"(a), "w"(b)
13745 : /* No clobbers */);
13746 return result;
13749 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13750 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
13752 uint8x8_t result;
13753 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13754 : "=w"(result)
13755 : "w"(a), "w"(b)
13756 : /* No clobbers */);
13757 return result;
13760 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13761 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
13763 uint16x4_t result;
13764 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13765 : "=w"(result)
13766 : "w"(a), "w"(b)
13767 : /* No clobbers */);
13768 return result;
13771 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13772 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
13774 uint32x2_t result;
13775 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13776 : "=w"(result)
13777 : "w"(a), "w"(b)
13778 : /* No clobbers */);
13779 return result;
13782 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13783 vuzp2q_f32 (float32x4_t a, float32x4_t b)
13785 float32x4_t result;
13786 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13787 : "=w"(result)
13788 : "w"(a), "w"(b)
13789 : /* No clobbers */);
13790 return result;
13793 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13794 vuzp2q_f64 (float64x2_t a, float64x2_t b)
13796 float64x2_t result;
13797 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13798 : "=w"(result)
13799 : "w"(a), "w"(b)
13800 : /* No clobbers */);
13801 return result;
13804 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13805 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
13807 poly8x16_t result;
13808 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13809 : "=w"(result)
13810 : "w"(a), "w"(b)
13811 : /* No clobbers */);
13812 return result;
13815 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13816 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
13818 poly16x8_t result;
13819 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13820 : "=w"(result)
13821 : "w"(a), "w"(b)
13822 : /* No clobbers */);
13823 return result;
13826 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13827 vuzp2q_s8 (int8x16_t a, int8x16_t b)
13829 int8x16_t result;
13830 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13831 : "=w"(result)
13832 : "w"(a), "w"(b)
13833 : /* No clobbers */);
13834 return result;
13837 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13838 vuzp2q_s16 (int16x8_t a, int16x8_t b)
13840 int16x8_t result;
13841 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13842 : "=w"(result)
13843 : "w"(a), "w"(b)
13844 : /* No clobbers */);
13845 return result;
13848 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13849 vuzp2q_s32 (int32x4_t a, int32x4_t b)
13851 int32x4_t result;
13852 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13853 : "=w"(result)
13854 : "w"(a), "w"(b)
13855 : /* No clobbers */);
13856 return result;
13859 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13860 vuzp2q_s64 (int64x2_t a, int64x2_t b)
13862 int64x2_t result;
13863 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13864 : "=w"(result)
13865 : "w"(a), "w"(b)
13866 : /* No clobbers */);
13867 return result;
13870 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13871 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
13873 uint8x16_t result;
13874 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13875 : "=w"(result)
13876 : "w"(a), "w"(b)
13877 : /* No clobbers */);
13878 return result;
13881 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13882 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
13884 uint16x8_t result;
13885 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13886 : "=w"(result)
13887 : "w"(a), "w"(b)
13888 : /* No clobbers */);
13889 return result;
13892 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13893 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
13895 uint32x4_t result;
13896 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13897 : "=w"(result)
13898 : "w"(a), "w"(b)
13899 : /* No clobbers */);
13900 return result;
13903 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13904 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
13906 uint64x2_t result;
13907 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13908 : "=w"(result)
13909 : "w"(a), "w"(b)
13910 : /* No clobbers */);
13911 return result;
13914 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13915 vzip1_f32 (float32x2_t a, float32x2_t b)
13917 float32x2_t result;
13918 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13919 : "=w"(result)
13920 : "w"(a), "w"(b)
13921 : /* No clobbers */);
13922 return result;
13925 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13926 vzip1_p8 (poly8x8_t a, poly8x8_t b)
13928 poly8x8_t result;
13929 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13930 : "=w"(result)
13931 : "w"(a), "w"(b)
13932 : /* No clobbers */);
13933 return result;
13936 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13937 vzip1_p16 (poly16x4_t a, poly16x4_t b)
13939 poly16x4_t result;
13940 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13941 : "=w"(result)
13942 : "w"(a), "w"(b)
13943 : /* No clobbers */);
13944 return result;
13947 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13948 vzip1_s8 (int8x8_t a, int8x8_t b)
13950 int8x8_t result;
13951 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13952 : "=w"(result)
13953 : "w"(a), "w"(b)
13954 : /* No clobbers */);
13955 return result;
13958 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13959 vzip1_s16 (int16x4_t a, int16x4_t b)
13961 int16x4_t result;
13962 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13963 : "=w"(result)
13964 : "w"(a), "w"(b)
13965 : /* No clobbers */);
13966 return result;
13969 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13970 vzip1_s32 (int32x2_t a, int32x2_t b)
13972 int32x2_t result;
13973 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13974 : "=w"(result)
13975 : "w"(a), "w"(b)
13976 : /* No clobbers */);
13977 return result;
13980 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13981 vzip1_u8 (uint8x8_t a, uint8x8_t b)
13983 uint8x8_t result;
13984 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13985 : "=w"(result)
13986 : "w"(a), "w"(b)
13987 : /* No clobbers */);
13988 return result;
13991 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13992 vzip1_u16 (uint16x4_t a, uint16x4_t b)
13994 uint16x4_t result;
13995 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13996 : "=w"(result)
13997 : "w"(a), "w"(b)
13998 : /* No clobbers */);
13999 return result;
14002 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14003 vzip1_u32 (uint32x2_t a, uint32x2_t b)
14005 uint32x2_t result;
14006 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
14007 : "=w"(result)
14008 : "w"(a), "w"(b)
14009 : /* No clobbers */);
14010 return result;
14013 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14014 vzip1q_f32 (float32x4_t a, float32x4_t b)
14016 float32x4_t result;
14017 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14018 : "=w"(result)
14019 : "w"(a), "w"(b)
14020 : /* No clobbers */);
14021 return result;
14024 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14025 vzip1q_f64 (float64x2_t a, float64x2_t b)
14027 float64x2_t result;
14028 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14029 : "=w"(result)
14030 : "w"(a), "w"(b)
14031 : /* No clobbers */);
14032 return result;
14035 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14036 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
14038 poly8x16_t result;
14039 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14040 : "=w"(result)
14041 : "w"(a), "w"(b)
14042 : /* No clobbers */);
14043 return result;
14046 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14047 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
14049 poly16x8_t result;
14050 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14051 : "=w"(result)
14052 : "w"(a), "w"(b)
14053 : /* No clobbers */);
14054 return result;
14057 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14058 vzip1q_s8 (int8x16_t a, int8x16_t b)
14060 int8x16_t result;
14061 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14062 : "=w"(result)
14063 : "w"(a), "w"(b)
14064 : /* No clobbers */);
14065 return result;
14068 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14069 vzip1q_s16 (int16x8_t a, int16x8_t b)
14071 int16x8_t result;
14072 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14073 : "=w"(result)
14074 : "w"(a), "w"(b)
14075 : /* No clobbers */);
14076 return result;
14079 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14080 vzip1q_s32 (int32x4_t a, int32x4_t b)
14082 int32x4_t result;
14083 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14084 : "=w"(result)
14085 : "w"(a), "w"(b)
14086 : /* No clobbers */);
14087 return result;
14090 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14091 vzip1q_s64 (int64x2_t a, int64x2_t b)
14093 int64x2_t result;
14094 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14095 : "=w"(result)
14096 : "w"(a), "w"(b)
14097 : /* No clobbers */);
14098 return result;
14101 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14102 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
14104 uint8x16_t result;
14105 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
14106 : "=w"(result)
14107 : "w"(a), "w"(b)
14108 : /* No clobbers */);
14109 return result;
14112 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14113 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
14115 uint16x8_t result;
14116 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
14117 : "=w"(result)
14118 : "w"(a), "w"(b)
14119 : /* No clobbers */);
14120 return result;
14123 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14124 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
14126 uint32x4_t result;
14127 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
14128 : "=w"(result)
14129 : "w"(a), "w"(b)
14130 : /* No clobbers */);
14131 return result;
14134 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14135 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
14137 uint64x2_t result;
14138 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
14139 : "=w"(result)
14140 : "w"(a), "w"(b)
14141 : /* No clobbers */);
14142 return result;
14145 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14146 vzip2_f32 (float32x2_t a, float32x2_t b)
14148 float32x2_t result;
14149 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
14150 : "=w"(result)
14151 : "w"(a), "w"(b)
14152 : /* No clobbers */);
14153 return result;
14156 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14157 vzip2_p8 (poly8x8_t a, poly8x8_t b)
14159 poly8x8_t result;
14160 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
14161 : "=w"(result)
14162 : "w"(a), "w"(b)
14163 : /* No clobbers */);
14164 return result;
14167 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14168 vzip2_p16 (poly16x4_t a, poly16x4_t b)
14170 poly16x4_t result;
14171 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
14172 : "=w"(result)
14173 : "w"(a), "w"(b)
14174 : /* No clobbers */);
14175 return result;
14178 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14179 vzip2_s8 (int8x8_t a, int8x8_t b)
14181 int8x8_t result;
14182 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
14183 : "=w"(result)
14184 : "w"(a), "w"(b)
14185 : /* No clobbers */);
14186 return result;
14189 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14190 vzip2_s16 (int16x4_t a, int16x4_t b)
14192 int16x4_t result;
14193 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
14194 : "=w"(result)
14195 : "w"(a), "w"(b)
14196 : /* No clobbers */);
14197 return result;
14200 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14201 vzip2_s32 (int32x2_t a, int32x2_t b)
14203 int32x2_t result;
14204 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
14205 : "=w"(result)
14206 : "w"(a), "w"(b)
14207 : /* No clobbers */);
14208 return result;
14211 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14212 vzip2_u8 (uint8x8_t a, uint8x8_t b)
14214 uint8x8_t result;
14215 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
14216 : "=w"(result)
14217 : "w"(a), "w"(b)
14218 : /* No clobbers */);
14219 return result;
14222 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14223 vzip2_u16 (uint16x4_t a, uint16x4_t b)
14225 uint16x4_t result;
14226 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
14227 : "=w"(result)
14228 : "w"(a), "w"(b)
14229 : /* No clobbers */);
14230 return result;
14233 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14234 vzip2_u32 (uint32x2_t a, uint32x2_t b)
14236 uint32x2_t result;
14237 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
14238 : "=w"(result)
14239 : "w"(a), "w"(b)
14240 : /* No clobbers */);
14241 return result;
14244 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14245 vzip2q_f32 (float32x4_t a, float32x4_t b)
14247 float32x4_t result;
14248 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
14249 : "=w"(result)
14250 : "w"(a), "w"(b)
14251 : /* No clobbers */);
14252 return result;
14255 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14256 vzip2q_f64 (float64x2_t a, float64x2_t b)
14258 float64x2_t result;
14259 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
14260 : "=w"(result)
14261 : "w"(a), "w"(b)
14262 : /* No clobbers */);
14263 return result;
14266 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14267 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
14269 poly8x16_t result;
14270 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
14271 : "=w"(result)
14272 : "w"(a), "w"(b)
14273 : /* No clobbers */);
14274 return result;
14277 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14278 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
14280 poly16x8_t result;
14281 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
14282 : "=w"(result)
14283 : "w"(a), "w"(b)
14284 : /* No clobbers */);
14285 return result;
14288 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14289 vzip2q_s8 (int8x16_t a, int8x16_t b)
14291 int8x16_t result;
14292 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
14293 : "=w"(result)
14294 : "w"(a), "w"(b)
14295 : /* No clobbers */);
14296 return result;
14299 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14300 vzip2q_s16 (int16x8_t a, int16x8_t b)
14302 int16x8_t result;
14303 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
14304 : "=w"(result)
14305 : "w"(a), "w"(b)
14306 : /* No clobbers */);
14307 return result;
14310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14311 vzip2q_s32 (int32x4_t a, int32x4_t b)
14313 int32x4_t result;
14314 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
14315 : "=w"(result)
14316 : "w"(a), "w"(b)
14317 : /* No clobbers */);
14318 return result;
14321 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14322 vzip2q_s64 (int64x2_t a, int64x2_t b)
14324 int64x2_t result;
14325 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
14326 : "=w"(result)
14327 : "w"(a), "w"(b)
14328 : /* No clobbers */);
14329 return result;
14332 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14333 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
14335 uint8x16_t result;
14336 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
14337 : "=w"(result)
14338 : "w"(a), "w"(b)
14339 : /* No clobbers */);
14340 return result;
14343 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14344 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
14346 uint16x8_t result;
14347 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
14348 : "=w"(result)
14349 : "w"(a), "w"(b)
14350 : /* No clobbers */);
14351 return result;
14354 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14355 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
14357 uint32x4_t result;
14358 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
14359 : "=w"(result)
14360 : "w"(a), "w"(b)
14361 : /* No clobbers */);
14362 return result;
14365 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14366 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
14368 uint64x2_t result;
14369 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
14370 : "=w"(result)
14371 : "w"(a), "w"(b)
14372 : /* No clobbers */);
14373 return result;
14376 /* End of temporary inline asm implementations. */
14378 /* Start of temporary inline asm for vldn, vstn and friends. */
14380 /* Create struct element types for duplicating loads.
14382 Create 2 element structures of:
14384 +------+----+----+----+----+
14385 | | 8 | 16 | 32 | 64 |
14386 +------+----+----+----+----+
14387 |int | Y | Y | N | N |
14388 +------+----+----+----+----+
14389 |uint | Y | Y | N | N |
14390 +------+----+----+----+----+
14391 |float | - | - | N | N |
14392 +------+----+----+----+----+
14393 |poly | Y | Y | - | - |
14394 +------+----+----+----+----+
14396 Create 3 element structures of:
14398 +------+----+----+----+----+
14399 | | 8 | 16 | 32 | 64 |
14400 +------+----+----+----+----+
14401 |int | Y | Y | Y | Y |
14402 +------+----+----+----+----+
14403 |uint | Y | Y | Y | Y |
14404 +------+----+----+----+----+
14405 |float | - | - | Y | Y |
14406 +------+----+----+----+----+
14407 |poly | Y | Y | - | - |
14408 +------+----+----+----+----+
14410 Create 4 element structures of:
14412 +------+----+----+----+----+
14413 | | 8 | 16 | 32 | 64 |
14414 +------+----+----+----+----+
14415 |int | Y | N | N | Y |
14416 +------+----+----+----+----+
14417 |uint | Y | N | N | Y |
14418 +------+----+----+----+----+
14419 |float | - | - | N | Y |
14420 +------+----+----+----+----+
14421 |poly | Y | N | - | - |
14422 +------+----+----+----+----+
14424 This is required for casting memory reference. */
14425 #define __STRUCTN(t, sz, nelem) \
14426 typedef struct t ## sz ## x ## nelem ## _t { \
14427 t ## sz ## _t val[nelem]; \
14428 } t ## sz ## x ## nelem ## _t;
14430 /* 2-element structs. */
14431 __STRUCTN (int, 8, 2)
14432 __STRUCTN (int, 16, 2)
14433 __STRUCTN (uint, 8, 2)
14434 __STRUCTN (uint, 16, 2)
14435 __STRUCTN (poly, 8, 2)
14436 __STRUCTN (poly, 16, 2)
14437 /* 3-element structs. */
14438 __STRUCTN (int, 8, 3)
14439 __STRUCTN (int, 16, 3)
14440 __STRUCTN (int, 32, 3)
14441 __STRUCTN (int, 64, 3)
14442 __STRUCTN (uint, 8, 3)
14443 __STRUCTN (uint, 16, 3)
14444 __STRUCTN (uint, 32, 3)
14445 __STRUCTN (uint, 64, 3)
14446 __STRUCTN (float, 32, 3)
14447 __STRUCTN (float, 64, 3)
14448 __STRUCTN (poly, 8, 3)
14449 __STRUCTN (poly, 16, 3)
14450 /* 4-element structs. */
14451 __STRUCTN (int, 8, 4)
14452 __STRUCTN (int, 64, 4)
14453 __STRUCTN (uint, 8, 4)
14454 __STRUCTN (uint, 64, 4)
14455 __STRUCTN (poly, 8, 4)
14456 __STRUCTN (float, 64, 4)
14457 #undef __STRUCTN
14459 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
14460 regsuffix, funcsuffix, Q) \
14461 __extension__ static __inline rettype \
14462 __attribute__ ((__always_inline__)) \
14463 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14465 rettype result; \
14466 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14467 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
14468 : "=Q"(result) \
14469 : "Q"(*(const structtype *)ptr) \
14470 : "memory", "v16", "v17"); \
14471 return result; \
14474 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
14475 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
14476 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
14477 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
14478 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
14479 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
14480 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
14481 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
14482 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
14483 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
14484 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
14485 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
14486 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
14487 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
14488 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
14489 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
14490 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
14491 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
14492 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
14493 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
14494 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
14495 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
14496 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
14497 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
14499 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
14500 lnsuffix, funcsuffix, Q) \
14501 __extension__ static __inline rettype \
14502 __attribute__ ((__always_inline__)) \
14503 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14504 rettype b, const int c) \
14506 rettype result; \
14507 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14508 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
14509 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
14510 : "=Q"(result) \
14511 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14512 : "memory", "v16", "v17"); \
14513 return result; \
14516 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
14517 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14518 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14519 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14520 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14521 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14522 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14523 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14524 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14525 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14526 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14527 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14528 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14529 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14530 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14531 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14532 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14533 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14534 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14535 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14536 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14537 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14538 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14539 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14541 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
14542 regsuffix, funcsuffix, Q) \
14543 __extension__ static __inline rettype \
14544 __attribute__ ((__always_inline__)) \
14545 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14547 rettype result; \
14548 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14549 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14550 : "=Q"(result) \
14551 : "Q"(*(const structtype *)ptr) \
14552 : "memory", "v16", "v17", "v18"); \
14553 return result; \
14556 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
14557 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
14558 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
14559 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
14560 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
14561 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
14562 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
14563 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
14564 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
14565 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
14566 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
14567 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
14568 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
14569 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
14570 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
14571 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
14572 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
14573 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
14574 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
14575 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
14576 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
14577 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
14578 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
14579 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
14581 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
14582 lnsuffix, funcsuffix, Q) \
14583 __extension__ static __inline rettype \
14584 __attribute__ ((__always_inline__)) \
14585 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14586 rettype b, const int c) \
14588 rettype result; \
14589 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14590 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
14591 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14592 : "=Q"(result) \
14593 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14594 : "memory", "v16", "v17", "v18"); \
14595 return result; \
14598 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
14599 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14600 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14601 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14602 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14603 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14604 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14605 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14606 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14607 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14608 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14609 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14610 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14611 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14612 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14613 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14614 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14615 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14616 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14617 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14618 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14619 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14620 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14621 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14623 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
14624 regsuffix, funcsuffix, Q) \
14625 __extension__ static __inline rettype \
14626 __attribute__ ((__always_inline__)) \
14627 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14629 rettype result; \
14630 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14631 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14632 : "=Q"(result) \
14633 : "Q"(*(const structtype *)ptr) \
14634 : "memory", "v16", "v17", "v18", "v19"); \
14635 return result; \
14638 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
14639 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
14640 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
14641 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
14642 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
14643 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
14644 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
14645 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
14646 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
14647 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
14648 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
14649 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
14650 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
14651 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
14652 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
14653 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
14654 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
14655 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
14656 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
14657 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
14658 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
14659 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
14660 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
14661 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
14663 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
14664 lnsuffix, funcsuffix, Q) \
14665 __extension__ static __inline rettype \
14666 __attribute__ ((__always_inline__)) \
14667 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14668 rettype b, const int c) \
14670 rettype result; \
14671 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14672 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
14673 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14674 : "=Q"(result) \
14675 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14676 : "memory", "v16", "v17", "v18", "v19"); \
14677 return result; \
14680 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
14681 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14682 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14683 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14684 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14685 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14686 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14687 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14688 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14689 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14690 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14691 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14692 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14693 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14694 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14695 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14696 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14697 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14698 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14699 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14700 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14701 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14702 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14703 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14705 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
14706 lnsuffix, funcsuffix, Q) \
14707 __extension__ static __inline void \
14708 __attribute__ ((__always_inline__)) \
14709 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14710 intype b, const int c) \
14712 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14713 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
14714 : "=Q"(*(intype *) ptr) \
14715 : "Q"(b), "i"(c) \
14716 : "memory", "v16", "v17"); \
14719 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
14720 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14721 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14722 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14723 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14724 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14725 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14726 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14727 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14728 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14729 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14730 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14731 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14732 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14733 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14734 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14735 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14736 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14737 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14738 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14739 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14740 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14741 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14742 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14744 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
14745 lnsuffix, funcsuffix, Q) \
14746 __extension__ static __inline void \
14747 __attribute__ ((__always_inline__)) \
14748 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14749 intype b, const int c) \
14751 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14752 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
14753 : "=Q"(*(intype *) ptr) \
14754 : "Q"(b), "i"(c) \
14755 : "memory", "v16", "v17", "v18"); \
14758 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
14759 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14760 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14761 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14762 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14763 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14764 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14765 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14766 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14767 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14768 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14769 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14770 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14771 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14772 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14773 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14774 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14775 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14776 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14777 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14778 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14779 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14780 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14781 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14783 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
14784 lnsuffix, funcsuffix, Q) \
14785 __extension__ static __inline void \
14786 __attribute__ ((__always_inline__)) \
14787 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14788 intype b, const int c) \
14790 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14791 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
14792 : "=Q"(*(intype *) ptr) \
14793 : "Q"(b), "i"(c) \
14794 : "memory", "v16", "v17", "v18", "v19"); \
14797 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
14798 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14799 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14800 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14801 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14802 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14803 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14804 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14805 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14806 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14807 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14808 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14809 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14810 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14811 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14812 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14813 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14814 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14815 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14816 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14817 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14818 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14819 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14820 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14822 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14823 vaddlv_s32 (int32x2_t a)
14825 int64_t result;
14826 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14827 return result;
14830 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14831 vaddlv_u32 (uint32x2_t a)
14833 uint64_t result;
14834 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14835 return result;
14838 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14839 vpaddd_s64 (int64x2_t __a)
14841 return __builtin_aarch64_addpdi (__a);
14844 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14845 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14847 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
14850 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14851 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14853 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
14856 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14857 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14859 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
14862 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14863 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14865 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
14868 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14869 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14871 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
14874 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14875 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14877 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
14880 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14881 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14883 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
14886 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14887 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14889 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
14892 /* Table intrinsics. */
14894 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14895 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
14897 poly8x8_t result;
14898 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14899 : "=w"(result)
14900 : "w"(a), "w"(b)
14901 : /* No clobbers */);
14902 return result;
14905 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14906 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
14908 int8x8_t result;
14909 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14910 : "=w"(result)
14911 : "w"(a), "w"(b)
14912 : /* No clobbers */);
14913 return result;
14916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14917 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
14919 uint8x8_t result;
14920 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14921 : "=w"(result)
14922 : "w"(a), "w"(b)
14923 : /* No clobbers */);
14924 return result;
14927 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14928 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
14930 poly8x16_t result;
14931 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14932 : "=w"(result)
14933 : "w"(a), "w"(b)
14934 : /* No clobbers */);
14935 return result;
14938 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14939 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
14941 int8x16_t result;
14942 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14943 : "=w"(result)
14944 : "w"(a), "w"(b)
14945 : /* No clobbers */);
14946 return result;
14949 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14950 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
14952 uint8x16_t result;
14953 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14954 : "=w"(result)
14955 : "w"(a), "w"(b)
14956 : /* No clobbers */);
14957 return result;
14960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14961 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
14963 int8x8_t result;
14964 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14965 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14966 :"=w"(result)
14967 :"Q"(tab),"w"(idx)
14968 :"memory", "v16", "v17");
14969 return result;
14972 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14973 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
14975 uint8x8_t result;
14976 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14977 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14978 :"=w"(result)
14979 :"Q"(tab),"w"(idx)
14980 :"memory", "v16", "v17");
14981 return result;
14984 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14985 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
14987 poly8x8_t result;
14988 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14989 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14990 :"=w"(result)
14991 :"Q"(tab),"w"(idx)
14992 :"memory", "v16", "v17");
14993 return result;
14996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14997 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
14999 int8x16_t result;
15000 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15001 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15002 :"=w"(result)
15003 :"Q"(tab),"w"(idx)
15004 :"memory", "v16", "v17");
15005 return result;
15008 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15009 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
15011 uint8x16_t result;
15012 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15013 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15014 :"=w"(result)
15015 :"Q"(tab),"w"(idx)
15016 :"memory", "v16", "v17");
15017 return result;
15020 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15021 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
15023 poly8x16_t result;
15024 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15025 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15026 :"=w"(result)
15027 :"Q"(tab),"w"(idx)
15028 :"memory", "v16", "v17");
15029 return result;
15032 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15033 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
15035 int8x8_t result;
15036 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15037 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15038 :"=w"(result)
15039 :"Q"(tab),"w"(idx)
15040 :"memory", "v16", "v17", "v18");
15041 return result;
15044 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15045 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
15047 uint8x8_t result;
15048 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15049 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15050 :"=w"(result)
15051 :"Q"(tab),"w"(idx)
15052 :"memory", "v16", "v17", "v18");
15053 return result;
15056 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15057 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
15059 poly8x8_t result;
15060 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15061 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15062 :"=w"(result)
15063 :"Q"(tab),"w"(idx)
15064 :"memory", "v16", "v17", "v18");
15065 return result;
15068 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15069 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
15071 int8x16_t result;
15072 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15073 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15074 :"=w"(result)
15075 :"Q"(tab),"w"(idx)
15076 :"memory", "v16", "v17", "v18");
15077 return result;
15080 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15081 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
15083 uint8x16_t result;
15084 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15085 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15086 :"=w"(result)
15087 :"Q"(tab),"w"(idx)
15088 :"memory", "v16", "v17", "v18");
15089 return result;
15092 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15093 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
15095 poly8x16_t result;
15096 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15097 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15098 :"=w"(result)
15099 :"Q"(tab),"w"(idx)
15100 :"memory", "v16", "v17", "v18");
15101 return result;
15104 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15105 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
15107 int8x8_t result;
15108 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15109 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15110 :"=w"(result)
15111 :"Q"(tab),"w"(idx)
15112 :"memory", "v16", "v17", "v18", "v19");
15113 return result;
15116 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15117 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
15119 uint8x8_t result;
15120 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15121 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15122 :"=w"(result)
15123 :"Q"(tab),"w"(idx)
15124 :"memory", "v16", "v17", "v18", "v19");
15125 return result;
15128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15129 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
15131 poly8x8_t result;
15132 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15133 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15134 :"=w"(result)
15135 :"Q"(tab),"w"(idx)
15136 :"memory", "v16", "v17", "v18", "v19");
15137 return result;
15141 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15142 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
15144 int8x16_t result;
15145 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15146 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15147 :"=w"(result)
15148 :"Q"(tab),"w"(idx)
15149 :"memory", "v16", "v17", "v18", "v19");
15150 return result;
15153 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15154 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
15156 uint8x16_t result;
15157 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15158 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15159 :"=w"(result)
15160 :"Q"(tab),"w"(idx)
15161 :"memory", "v16", "v17", "v18", "v19");
15162 return result;
15165 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15166 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
15168 poly8x16_t result;
15169 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15170 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15171 :"=w"(result)
15172 :"Q"(tab),"w"(idx)
15173 :"memory", "v16", "v17", "v18", "v19");
15174 return result;
15178 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15179 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
15181 int8x8_t result = r;
15182 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15183 : "+w"(result)
15184 : "w"(tab), "w"(idx)
15185 : /* No clobbers */);
15186 return result;
15189 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15190 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
15192 uint8x8_t result = r;
15193 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15194 : "+w"(result)
15195 : "w"(tab), "w"(idx)
15196 : /* No clobbers */);
15197 return result;
15200 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15201 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
15203 poly8x8_t result = r;
15204 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
15205 : "+w"(result)
15206 : "w"(tab), "w"(idx)
15207 : /* No clobbers */);
15208 return result;
15211 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15212 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
15214 int8x16_t result = r;
15215 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
15216 : "+w"(result)
15217 : "w"(tab), "w"(idx)
15218 : /* No clobbers */);
15219 return result;
15222 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15223 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
15225 uint8x16_t result = r;
15226 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
15227 : "+w"(result)
15228 : "w"(tab), "w"(idx)
15229 : /* No clobbers */);
15230 return result;
15233 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15234 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
15236 poly8x16_t result = r;
15237 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
15238 : "+w"(result)
15239 : "w"(tab), "w"(idx)
15240 : /* No clobbers */);
15241 return result;
15244 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15245 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
15247 int8x8_t result = r;
15248 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15249 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15250 :"+w"(result)
15251 :"Q"(tab),"w"(idx)
15252 :"memory", "v16", "v17");
15253 return result;
15256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15257 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
15259 uint8x8_t result = r;
15260 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15261 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15262 :"+w"(result)
15263 :"Q"(tab),"w"(idx)
15264 :"memory", "v16", "v17");
15265 return result;
15268 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15269 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
15271 poly8x8_t result = r;
15272 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15273 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
15274 :"+w"(result)
15275 :"Q"(tab),"w"(idx)
15276 :"memory", "v16", "v17");
15277 return result;
15281 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15282 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
15284 int8x16_t result = r;
15285 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15286 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15287 :"+w"(result)
15288 :"Q"(tab),"w"(idx)
15289 :"memory", "v16", "v17");
15290 return result;
15293 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15294 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
15296 uint8x16_t result = r;
15297 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15298 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15299 :"+w"(result)
15300 :"Q"(tab),"w"(idx)
15301 :"memory", "v16", "v17");
15302 return result;
15305 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15306 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
15308 poly8x16_t result = r;
15309 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
15310 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
15311 :"+w"(result)
15312 :"Q"(tab),"w"(idx)
15313 :"memory", "v16", "v17");
15314 return result;
15318 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15319 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
15321 int8x8_t result = r;
15322 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15323 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15324 :"+w"(result)
15325 :"Q"(tab),"w"(idx)
15326 :"memory", "v16", "v17", "v18");
15327 return result;
15330 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15331 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
15333 uint8x8_t result = r;
15334 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15335 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15336 :"+w"(result)
15337 :"Q"(tab),"w"(idx)
15338 :"memory", "v16", "v17", "v18");
15339 return result;
15342 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15343 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
15345 poly8x8_t result = r;
15346 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15347 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
15348 :"+w"(result)
15349 :"Q"(tab),"w"(idx)
15350 :"memory", "v16", "v17", "v18");
15351 return result;
15355 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15356 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
15358 int8x16_t result = r;
15359 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15360 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15361 :"+w"(result)
15362 :"Q"(tab),"w"(idx)
15363 :"memory", "v16", "v17", "v18");
15364 return result;
15367 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15368 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
15370 uint8x16_t result = r;
15371 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15372 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15373 :"+w"(result)
15374 :"Q"(tab),"w"(idx)
15375 :"memory", "v16", "v17", "v18");
15376 return result;
15379 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15380 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
15382 poly8x16_t result = r;
15383 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
15384 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
15385 :"+w"(result)
15386 :"Q"(tab),"w"(idx)
15387 :"memory", "v16", "v17", "v18");
15388 return result;
15392 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15393 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
15395 int8x8_t result = r;
15396 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15397 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15398 :"+w"(result)
15399 :"Q"(tab),"w"(idx)
15400 :"memory", "v16", "v17", "v18", "v19");
15401 return result;
15404 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15405 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
15407 uint8x8_t result = r;
15408 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15409 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15410 :"+w"(result)
15411 :"Q"(tab),"w"(idx)
15412 :"memory", "v16", "v17", "v18", "v19");
15413 return result;
15416 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15417 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
15419 poly8x8_t result = r;
15420 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15421 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
15422 :"+w"(result)
15423 :"Q"(tab),"w"(idx)
15424 :"memory", "v16", "v17", "v18", "v19");
15425 return result;
15429 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15430 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
15432 int8x16_t result = r;
15433 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15434 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15435 :"+w"(result)
15436 :"Q"(tab),"w"(idx)
15437 :"memory", "v16", "v17", "v18", "v19");
15438 return result;
15441 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15442 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
15444 uint8x16_t result = r;
15445 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15446 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15447 :"+w"(result)
15448 :"Q"(tab),"w"(idx)
15449 :"memory", "v16", "v17", "v18", "v19");
15450 return result;
15453 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15454 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
15456 poly8x16_t result = r;
15457 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
15458 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
15459 :"+w"(result)
15460 :"Q"(tab),"w"(idx)
15461 :"memory", "v16", "v17", "v18", "v19");
15462 return result;
15465 /* V7 legacy table intrinsics. */
15467 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15468 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
15470 int8x8_t result;
15471 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15472 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15473 : "=w"(result)
15474 : "w"(temp), "w"(idx)
15475 : /* No clobbers */);
15476 return result;
15479 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15480 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
15482 uint8x8_t result;
15483 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15484 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15485 : "=w"(result)
15486 : "w"(temp), "w"(idx)
15487 : /* No clobbers */);
15488 return result;
15491 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15492 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
15494 poly8x8_t result;
15495 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15496 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15497 : "=w"(result)
15498 : "w"(temp), "w"(idx)
15499 : /* No clobbers */);
15500 return result;
15503 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15504 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
15506 int8x8_t result;
15507 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15508 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15509 : "=w"(result)
15510 : "w"(temp), "w"(idx)
15511 : /* No clobbers */);
15512 return result;
15515 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15516 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
15518 uint8x8_t result;
15519 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15520 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15521 : "=w"(result)
15522 : "w"(temp), "w"(idx)
15523 : /* No clobbers */);
15524 return result;
15527 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15528 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
15530 poly8x8_t result;
15531 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15532 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15533 : "=w"(result)
15534 : "w"(temp), "w"(idx)
15535 : /* No clobbers */);
15536 return result;
15539 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15540 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
15542 int8x8_t result;
15543 int8x16x2_t temp;
15544 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15545 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15546 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15547 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15548 : "=w"(result)
15549 : "Q"(temp), "w"(idx)
15550 : "v16", "v17", "memory");
15551 return result;
15554 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15555 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
15557 uint8x8_t result;
15558 uint8x16x2_t temp;
15559 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15560 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15561 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15562 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15563 : "=w"(result)
15564 : "Q"(temp), "w"(idx)
15565 : "v16", "v17", "memory");
15566 return result;
15569 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15570 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
15572 poly8x8_t result;
15573 poly8x16x2_t temp;
15574 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15575 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15576 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15577 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15578 : "=w"(result)
15579 : "Q"(temp), "w"(idx)
15580 : "v16", "v17", "memory");
15581 return result;
15584 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15585 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
15587 int8x8_t result;
15588 int8x16x2_t temp;
15589 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15590 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15591 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15592 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15593 : "=w"(result)
15594 : "Q"(temp), "w"(idx)
15595 : "v16", "v17", "memory");
15596 return result;
15599 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15600 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
15602 uint8x8_t result;
15603 uint8x16x2_t temp;
15604 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15605 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15606 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15607 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15608 : "=w"(result)
15609 : "Q"(temp), "w"(idx)
15610 : "v16", "v17", "memory");
15611 return result;
15614 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15615 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
15617 poly8x8_t result;
15618 poly8x16x2_t temp;
15619 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15620 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15621 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15622 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15623 : "=w"(result)
15624 : "Q"(temp), "w"(idx)
15625 : "v16", "v17", "memory");
15626 return result;
15629 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15630 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
15632 int8x8_t result;
15633 int8x8_t tmp1;
15634 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15635 __asm__ ("movi %0.8b, 8\n\t"
15636 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15637 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
15638 "bsl %0.8b, %4.8b, %1.8b\n\t"
15639 : "+w"(result), "=&w"(tmp1)
15640 : "w"(temp), "w"(idx), "w"(r)
15641 : /* No clobbers */);
15642 return result;
15645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15646 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
15648 uint8x8_t result;
15649 uint8x8_t tmp1;
15650 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15651 __asm__ ("movi %0.8b, 8\n\t"
15652 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15653 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
15654 "bsl %0.8b, %4.8b, %1.8b\n\t"
15655 : "+w"(result), "=&w"(tmp1)
15656 : "w"(temp), "w"(idx), "w"(r)
15657 : /* No clobbers */);
15658 return result;
15661 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15662 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
15664 poly8x8_t result;
15665 poly8x8_t tmp1;
15666 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15667 __asm__ ("movi %0.8b, 8\n\t"
15668 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15669 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
15670 "bsl %0.8b, %4.8b, %1.8b\n\t"
15671 : "+w"(result), "=&w"(tmp1)
15672 : "w"(temp), "w"(idx), "w"(r)
15673 : /* No clobbers */);
15674 return result;
15677 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15678 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
15680 int8x8_t result = r;
15681 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15682 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15683 : "+w"(result)
15684 : "w"(temp), "w"(idx)
15685 : /* No clobbers */);
15686 return result;
15689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15690 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
15692 uint8x8_t result = r;
15693 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15694 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15695 : "+w"(result)
15696 : "w"(temp), "w"(idx)
15697 : /* No clobbers */);
15698 return result;
15701 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15702 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
15704 poly8x8_t result = r;
15705 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15706 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15707 : "+w"(result)
15708 : "w"(temp), "w"(idx)
15709 : /* No clobbers */);
15710 return result;
15713 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15714 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
15716 int8x8_t result;
15717 int8x8_t tmp1;
15718 int8x16x2_t temp;
15719 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15720 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15721 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
15722 "movi %0.8b, 24\n\t"
15723 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15724 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
15725 "bsl %0.8b, %4.8b, %1.8b\n\t"
15726 : "+w"(result), "=&w"(tmp1)
15727 : "Q"(temp), "w"(idx), "w"(r)
15728 : "v16", "v17", "memory");
15729 return result;
15732 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15733 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
15735 uint8x8_t result;
15736 uint8x8_t tmp1;
15737 uint8x16x2_t temp;
15738 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15739 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15740 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
15741 "movi %0.8b, 24\n\t"
15742 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15743 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
15744 "bsl %0.8b, %4.8b, %1.8b\n\t"
15745 : "+w"(result), "=&w"(tmp1)
15746 : "Q"(temp), "w"(idx), "w"(r)
15747 : "v16", "v17", "memory");
15748 return result;
15751 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15752 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
15754 poly8x8_t result;
15755 poly8x8_t tmp1;
15756 poly8x16x2_t temp;
15757 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15758 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15759 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
15760 "movi %0.8b, 24\n\t"
15761 "cmhs %0.8b, %3.8b, %0.8b\n\t"
15762 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
15763 "bsl %0.8b, %4.8b, %1.8b\n\t"
15764 : "+w"(result), "=&w"(tmp1)
15765 : "Q"(temp), "w"(idx), "w"(r)
15766 : "v16", "v17", "memory");
15767 return result;
15770 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15771 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
15773 int8x8_t result = r;
15774 int8x16x2_t temp;
15775 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15776 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15777 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15778 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15779 : "+w"(result)
15780 : "Q"(temp), "w"(idx)
15781 : "v16", "v17", "memory");
15782 return result;
15785 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15786 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
15788 uint8x8_t result = r;
15789 uint8x16x2_t temp;
15790 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15791 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15792 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15793 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15794 : "+w"(result)
15795 : "Q"(temp), "w"(idx)
15796 : "v16", "v17", "memory");
15797 return result;
15800 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15801 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
15803 poly8x8_t result = r;
15804 poly8x16x2_t temp;
15805 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15806 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15807 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15808 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15809 : "+w"(result)
15810 : "Q"(temp), "w"(idx)
15811 : "v16", "v17", "memory");
15812 return result;
15815 /* End of temporary inline asm. */
15817 /* Start of optimal implementations in approved order. */
15819 /* vabs */
15821 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15822 vabs_f32 (float32x2_t __a)
15824 return __builtin_aarch64_absv2sf (__a);
15827 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15828 vabs_f64 (float64x1_t __a)
15830 return __builtin_fabs (__a);
15833 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15834 vabs_s8 (int8x8_t __a)
15836 return __builtin_aarch64_absv8qi (__a);
15839 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15840 vabs_s16 (int16x4_t __a)
15842 return __builtin_aarch64_absv4hi (__a);
15845 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15846 vabs_s32 (int32x2_t __a)
15848 return __builtin_aarch64_absv2si (__a);
15851 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15852 vabs_s64 (int64x1_t __a)
15854 return __builtin_llabs (__a);
15857 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15858 vabsq_f32 (float32x4_t __a)
15860 return __builtin_aarch64_absv4sf (__a);
15863 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15864 vabsq_f64 (float64x2_t __a)
15866 return __builtin_aarch64_absv2df (__a);
15869 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15870 vabsq_s8 (int8x16_t __a)
15872 return __builtin_aarch64_absv16qi (__a);
15875 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15876 vabsq_s16 (int16x8_t __a)
15878 return __builtin_aarch64_absv8hi (__a);
15881 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15882 vabsq_s32 (int32x4_t __a)
15884 return __builtin_aarch64_absv4si (__a);
15887 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15888 vabsq_s64 (int64x2_t __a)
15890 return __builtin_aarch64_absv2di (__a);
15893 /* vadd */
15895 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15896 vaddd_s64 (int64x1_t __a, int64x1_t __b)
15898 return __a + __b;
15901 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15902 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
15904 return __a + __b;
15907 /* vaddv */
15909 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15910 vaddv_s8 (int8x8_t __a)
15912 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
15915 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15916 vaddv_s16 (int16x4_t __a)
15918 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
15921 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15922 vaddv_s32 (int32x2_t __a)
15924 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
15927 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15928 vaddv_u8 (uint8x8_t __a)
15930 return vget_lane_u8 ((uint8x8_t)
15931 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
15934 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15935 vaddv_u16 (uint16x4_t __a)
15937 return vget_lane_u16 ((uint16x4_t)
15938 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
15941 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15942 vaddv_u32 (uint32x2_t __a)
15944 return vget_lane_u32 ((uint32x2_t)
15945 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
15948 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15949 vaddvq_s8 (int8x16_t __a)
15951 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
15954 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15955 vaddvq_s16 (int16x8_t __a)
15957 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
15960 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15961 vaddvq_s32 (int32x4_t __a)
15963 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
15966 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15967 vaddvq_s64 (int64x2_t __a)
15969 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
15972 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15973 vaddvq_u8 (uint8x16_t __a)
15975 return vgetq_lane_u8 ((uint8x16_t)
15976 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
15979 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15980 vaddvq_u16 (uint16x8_t __a)
15982 return vgetq_lane_u16 ((uint16x8_t)
15983 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
15986 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15987 vaddvq_u32 (uint32x4_t __a)
15989 return vgetq_lane_u32 ((uint32x4_t)
15990 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
15993 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15994 vaddvq_u64 (uint64x2_t __a)
15996 return vgetq_lane_u64 ((uint64x2_t)
15997 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
16000 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16001 vaddv_f32 (float32x2_t __a)
16003 float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
16004 return vget_lane_f32 (t, 0);
16007 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16008 vaddvq_f32 (float32x4_t __a)
16010 float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
16011 return vgetq_lane_f32 (t, 0);
16014 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16015 vaddvq_f64 (float64x2_t __a)
16017 float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
16018 return vgetq_lane_f64 (t, 0);
16021 /* vcage */
16023 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16024 vcages_f32 (float32_t __a, float32_t __b)
16026 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
16029 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16030 vcage_f32 (float32x2_t __a, float32x2_t __b)
16032 return vabs_f32 (__a) >= vabs_f32 (__b);
16035 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16036 vcageq_f32 (float32x4_t __a, float32x4_t __b)
16038 return vabsq_f32 (__a) >= vabsq_f32 (__b);
16041 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16042 vcaged_f64 (float64_t __a, float64_t __b)
16044 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
16047 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16048 vcageq_f64 (float64x2_t __a, float64x2_t __b)
16050 return vabsq_f64 (__a) >= vabsq_f64 (__b);
16053 /* vcagt */
16055 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16056 vcagts_f32 (float32_t __a, float32_t __b)
16058 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
16061 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16062 vcagt_f32 (float32x2_t __a, float32x2_t __b)
16064 return vabs_f32 (__a) > vabs_f32 (__b);
16067 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16068 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
16070 return vabsq_f32 (__a) > vabsq_f32 (__b);
16073 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16074 vcagtd_f64 (float64_t __a, float64_t __b)
16076 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
16079 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16080 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
16082 return vabsq_f64 (__a) > vabsq_f64 (__b);
16085 /* vcale */
16087 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16088 vcale_f32 (float32x2_t __a, float32x2_t __b)
16090 return vabs_f32 (__a) <= vabs_f32 (__b);
16093 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16094 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
16096 return vabsq_f32 (__a) <= vabsq_f32 (__b);
16099 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16100 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
16102 return vabsq_f64 (__a) <= vabsq_f64 (__b);
16105 /* vcalt */
16107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16108 vcalt_f32 (float32x2_t __a, float32x2_t __b)
16110 return vabs_f32 (__a) < vabs_f32 (__b);
16113 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16114 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
16116 return vabsq_f32 (__a) < vabsq_f32 (__b);
16119 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16120 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
16122 return vabsq_f64 (__a) < vabsq_f64 (__b);
16125 /* vceq - vector. */
16127 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16128 vceq_f32 (float32x2_t __a, float32x2_t __b)
16130 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
16133 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16134 vceq_f64 (float64x1_t __a, float64x1_t __b)
16136 return __a == __b ? -1ll : 0ll;
16139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16140 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
16142 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16143 (int8x8_t) __b);
16146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16147 vceq_s8 (int8x8_t __a, int8x8_t __b)
16149 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
16152 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16153 vceq_s16 (int16x4_t __a, int16x4_t __b)
16155 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
16158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16159 vceq_s32 (int32x2_t __a, int32x2_t __b)
16161 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
16164 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16165 vceq_s64 (int64x1_t __a, int64x1_t __b)
16167 return __a == __b ? -1ll : 0ll;
16170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16171 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
16173 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16174 (int8x8_t) __b);
16177 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16178 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
16180 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
16181 (int16x4_t) __b);
16184 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16185 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
16187 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
16188 (int32x2_t) __b);
16191 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16192 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
16194 return __a == __b ? -1ll : 0ll;
16197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16198 vceqq_f32 (float32x4_t __a, float32x4_t __b)
16200 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
16203 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16204 vceqq_f64 (float64x2_t __a, float64x2_t __b)
16206 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
16209 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16210 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
16212 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16213 (int8x16_t) __b);
16216 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16217 vceqq_s8 (int8x16_t __a, int8x16_t __b)
16219 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
16222 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16223 vceqq_s16 (int16x8_t __a, int16x8_t __b)
16225 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
16228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16229 vceqq_s32 (int32x4_t __a, int32x4_t __b)
16231 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
16234 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16235 vceqq_s64 (int64x2_t __a, int64x2_t __b)
16237 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
16240 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16241 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
16243 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16244 (int8x16_t) __b);
16247 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16248 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
16250 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
16251 (int16x8_t) __b);
16254 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16255 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
16257 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
16258 (int32x4_t) __b);
16261 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16262 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
16264 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
16265 (int64x2_t) __b);
16268 /* vceq - scalar. */
16270 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16271 vceqs_f32 (float32_t __a, float32_t __b)
16273 return __a == __b ? -1 : 0;
16276 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16277 vceqd_s64 (int64x1_t __a, int64x1_t __b)
16279 return __a == __b ? -1ll : 0ll;
16282 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16283 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
16285 return __a == __b ? -1ll : 0ll;
16288 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16289 vceqd_f64 (float64_t __a, float64_t __b)
16291 return __a == __b ? -1ll : 0ll;
16294 /* vceqz - vector. */
16296 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16297 vceqz_f32 (float32x2_t __a)
16299 float32x2_t __b = {0.0f, 0.0f};
16300 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
16303 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16304 vceqz_f64 (float64x1_t __a)
16306 return __a == 0.0 ? -1ll : 0ll;
16309 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16310 vceqz_p8 (poly8x8_t __a)
16312 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16313 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16314 (int8x8_t) __b);
16317 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16318 vceqz_s8 (int8x8_t __a)
16320 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16321 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
16324 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16325 vceqz_s16 (int16x4_t __a)
16327 int16x4_t __b = {0, 0, 0, 0};
16328 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
16331 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16332 vceqz_s32 (int32x2_t __a)
16334 int32x2_t __b = {0, 0};
16335 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
16338 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16339 vceqz_s64 (int64x1_t __a)
16341 return __a == 0ll ? -1ll : 0ll;
16344 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16345 vceqz_u8 (uint8x8_t __a)
16347 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16348 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
16349 (int8x8_t) __b);
16352 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16353 vceqz_u16 (uint16x4_t __a)
16355 uint16x4_t __b = {0, 0, 0, 0};
16356 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
16357 (int16x4_t) __b);
16360 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16361 vceqz_u32 (uint32x2_t __a)
16363 uint32x2_t __b = {0, 0};
16364 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
16365 (int32x2_t) __b);
16368 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16369 vceqz_u64 (uint64x1_t __a)
16371 return __a == 0ll ? -1ll : 0ll;
16374 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16375 vceqzq_f32 (float32x4_t __a)
16377 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16378 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
16381 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16382 vceqzq_f64 (float64x2_t __a)
16384 float64x2_t __b = {0.0, 0.0};
16385 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
16388 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16389 vceqzq_p8 (poly8x16_t __a)
16391 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16392 0, 0, 0, 0, 0, 0, 0, 0};
16393 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16394 (int8x16_t) __b);
16397 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16398 vceqzq_s8 (int8x16_t __a)
16400 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16401 0, 0, 0, 0, 0, 0, 0, 0};
16402 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
16405 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16406 vceqzq_s16 (int16x8_t __a)
16408 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16409 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
16412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16413 vceqzq_s32 (int32x4_t __a)
16415 int32x4_t __b = {0, 0, 0, 0};
16416 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
16419 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16420 vceqzq_s64 (int64x2_t __a)
16422 int64x2_t __b = {0, 0};
16423 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
16426 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16427 vceqzq_u8 (uint8x16_t __a)
16429 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16430 0, 0, 0, 0, 0, 0, 0, 0};
16431 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16432 (int8x16_t) __b);
16435 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16436 vceqzq_u16 (uint16x8_t __a)
16438 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16439 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
16440 (int16x8_t) __b);
16443 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16444 vceqzq_u32 (uint32x4_t __a)
16446 uint32x4_t __b = {0, 0, 0, 0};
16447 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
16448 (int32x4_t) __b);
16451 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16452 vceqzq_u64 (uint64x2_t __a)
16454 uint64x2_t __b = {0, 0};
16455 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
16456 (int64x2_t) __b);
16459 /* vceqz - scalar. */
16461 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16462 vceqzs_f32 (float32_t __a)
16464 return __a == 0.0f ? -1 : 0;
16467 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16468 vceqzd_s64 (int64x1_t __a)
16470 return __a == 0 ? -1ll : 0ll;
16473 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16474 vceqzd_u64 (int64x1_t __a)
16476 return __a == 0 ? -1ll : 0ll;
16479 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16480 vceqzd_f64 (float64_t __a)
16482 return __a == 0.0 ? -1ll : 0ll;
16485 /* vcge - vector. */
16487 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16488 vcge_f32 (float32x2_t __a, float32x2_t __b)
16490 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16493 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16494 vcge_f64 (float64x1_t __a, float64x1_t __b)
16496 return __a >= __b ? -1ll : 0ll;
16499 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16500 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
16502 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16503 (int8x8_t) __b);
16506 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16507 vcge_s8 (int8x8_t __a, int8x8_t __b)
16509 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16513 vcge_s16 (int16x4_t __a, int16x4_t __b)
16515 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16518 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16519 vcge_s32 (int32x2_t __a, int32x2_t __b)
16521 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16524 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16525 vcge_s64 (int64x1_t __a, int64x1_t __b)
16527 return __a >= __b ? -1ll : 0ll;
16530 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16531 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
16533 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16534 (int8x8_t) __b);
16537 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16538 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
16540 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16541 (int16x4_t) __b);
16544 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16545 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
16547 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16548 (int32x2_t) __b);
16551 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16552 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
16554 return __a >= __b ? -1ll : 0ll;
16557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16558 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
16560 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16563 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16564 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
16566 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16569 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16570 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
16572 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16573 (int8x16_t) __b);
16576 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16577 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
16579 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16582 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16583 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
16585 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16588 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16589 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
16591 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16594 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16595 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
16597 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16600 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16601 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
16603 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16604 (int8x16_t) __b);
16607 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16608 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
16610 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16611 (int16x8_t) __b);
16614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16615 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
16617 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16618 (int32x4_t) __b);
16621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16622 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
16624 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16625 (int64x2_t) __b);
16628 /* vcge - scalar. */
16630 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16631 vcges_f32 (float32_t __a, float32_t __b)
16633 return __a >= __b ? -1 : 0;
16636 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16637 vcged_s64 (int64x1_t __a, int64x1_t __b)
16639 return __a >= __b ? -1ll : 0ll;
16642 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16643 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
16645 return __a >= __b ? -1ll : 0ll;
16648 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16649 vcged_f64 (float64_t __a, float64_t __b)
16651 return __a >= __b ? -1ll : 0ll;
16654 /* vcgez - vector. */
16656 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16657 vcgez_f32 (float32x2_t __a)
16659 float32x2_t __b = {0.0f, 0.0f};
16660 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16663 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16664 vcgez_f64 (float64x1_t __a)
16666 return __a >= 0.0 ? -1ll : 0ll;
16669 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16670 vcgez_p8 (poly8x8_t __a)
16672 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16673 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16674 (int8x8_t) __b);
16677 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16678 vcgez_s8 (int8x8_t __a)
16680 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16681 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16684 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16685 vcgez_s16 (int16x4_t __a)
16687 int16x4_t __b = {0, 0, 0, 0};
16688 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16691 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16692 vcgez_s32 (int32x2_t __a)
16694 int32x2_t __b = {0, 0};
16695 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16698 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16699 vcgez_s64 (int64x1_t __a)
16701 return __a >= 0ll ? -1ll : 0ll;
16704 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16705 vcgez_u8 (uint8x8_t __a)
16707 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16708 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16709 (int8x8_t) __b);
16712 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16713 vcgez_u16 (uint16x4_t __a)
16715 uint16x4_t __b = {0, 0, 0, 0};
16716 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16717 (int16x4_t) __b);
16720 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16721 vcgez_u32 (uint32x2_t __a)
16723 uint32x2_t __b = {0, 0};
16724 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16725 (int32x2_t) __b);
16728 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16729 vcgez_u64 (uint64x1_t __a)
16731 return __a >= 0ll ? -1ll : 0ll;
16734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16735 vcgezq_f32 (float32x4_t __a)
16737 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16738 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16741 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16742 vcgezq_f64 (float64x2_t __a)
16744 float64x2_t __b = {0.0, 0.0};
16745 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16748 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16749 vcgezq_p8 (poly8x16_t __a)
16751 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16752 0, 0, 0, 0, 0, 0, 0, 0};
16753 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16754 (int8x16_t) __b);
16757 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16758 vcgezq_s8 (int8x16_t __a)
16760 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16761 0, 0, 0, 0, 0, 0, 0, 0};
16762 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16765 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16766 vcgezq_s16 (int16x8_t __a)
16768 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16769 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16772 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16773 vcgezq_s32 (int32x4_t __a)
16775 int32x4_t __b = {0, 0, 0, 0};
16776 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16779 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16780 vcgezq_s64 (int64x2_t __a)
16782 int64x2_t __b = {0, 0};
16783 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16786 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16787 vcgezq_u8 (uint8x16_t __a)
16789 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16790 0, 0, 0, 0, 0, 0, 0, 0};
16791 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16792 (int8x16_t) __b);
16795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16796 vcgezq_u16 (uint16x8_t __a)
16798 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16799 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16800 (int16x8_t) __b);
16803 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16804 vcgezq_u32 (uint32x4_t __a)
16806 uint32x4_t __b = {0, 0, 0, 0};
16807 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16808 (int32x4_t) __b);
16811 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16812 vcgezq_u64 (uint64x2_t __a)
16814 uint64x2_t __b = {0, 0};
16815 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16816 (int64x2_t) __b);
16819 /* vcgez - scalar. */
16821 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16822 vcgezs_f32 (float32_t __a)
16824 return __a >= 0.0f ? -1 : 0;
16827 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16828 vcgezd_s64 (int64x1_t __a)
16830 return __a >= 0 ? -1ll : 0ll;
16833 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16834 vcgezd_u64 (int64x1_t __a)
16836 return __a >= 0 ? -1ll : 0ll;
16839 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16840 vcgezd_f64 (float64_t __a)
16842 return __a >= 0.0 ? -1ll : 0ll;
16845 /* vcgt - vector. */
16847 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16848 vcgt_f32 (float32x2_t __a, float32x2_t __b)
16850 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16853 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16854 vcgt_f64 (float64x1_t __a, float64x1_t __b)
16856 return __a > __b ? -1ll : 0ll;
16859 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16860 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
16862 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16863 (int8x8_t) __b);
16866 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16867 vcgt_s8 (int8x8_t __a, int8x8_t __b)
16869 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16872 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16873 vcgt_s16 (int16x4_t __a, int16x4_t __b)
16875 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16878 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16879 vcgt_s32 (int32x2_t __a, int32x2_t __b)
16881 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16884 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16885 vcgt_s64 (int64x1_t __a, int64x1_t __b)
16887 return __a > __b ? -1ll : 0ll;
16890 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16891 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
16893 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16894 (int8x8_t) __b);
16897 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16898 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
16900 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16901 (int16x4_t) __b);
16904 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16905 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
16907 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16908 (int32x2_t) __b);
16911 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16912 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
16914 return __a > __b ? -1ll : 0ll;
16917 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16918 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
16920 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16924 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
16926 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16929 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16930 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
16932 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16933 (int8x16_t) __b);
16936 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16937 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
16939 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16942 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16943 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
16945 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16948 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16949 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
16951 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16954 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16955 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
16957 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16960 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16961 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
16963 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16964 (int8x16_t) __b);
16967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16968 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
16970 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16971 (int16x8_t) __b);
16974 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16975 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
16977 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16978 (int32x4_t) __b);
16981 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16982 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
16984 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16985 (int64x2_t) __b);
16988 /* vcgt - scalar. */
16990 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16991 vcgts_f32 (float32_t __a, float32_t __b)
16993 return __a > __b ? -1 : 0;
16996 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16997 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
16999 return __a > __b ? -1ll : 0ll;
17002 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17003 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
17005 return __a > __b ? -1ll : 0ll;
17008 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17009 vcgtd_f64 (float64_t __a, float64_t __b)
17011 return __a > __b ? -1ll : 0ll;
17014 /* vcgtz - vector. */
17016 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17017 vcgtz_f32 (float32x2_t __a)
17019 float32x2_t __b = {0.0f, 0.0f};
17020 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
17023 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17024 vcgtz_f64 (float64x1_t __a)
17026 return __a > 0.0 ? -1ll : 0ll;
17029 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17030 vcgtz_p8 (poly8x8_t __a)
17032 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17033 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
17034 (int8x8_t) __b);
17037 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17038 vcgtz_s8 (int8x8_t __a)
17040 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17041 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
17044 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17045 vcgtz_s16 (int16x4_t __a)
17047 int16x4_t __b = {0, 0, 0, 0};
17048 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
17051 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17052 vcgtz_s32 (int32x2_t __a)
17054 int32x2_t __b = {0, 0};
17055 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
17058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17059 vcgtz_s64 (int64x1_t __a)
17061 return __a > 0ll ? -1ll : 0ll;
17064 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17065 vcgtz_u8 (uint8x8_t __a)
17067 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17068 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
17069 (int8x8_t) __b);
17072 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17073 vcgtz_u16 (uint16x4_t __a)
17075 uint16x4_t __b = {0, 0, 0, 0};
17076 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
17077 (int16x4_t) __b);
17080 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17081 vcgtz_u32 (uint32x2_t __a)
17083 uint32x2_t __b = {0, 0};
17084 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
17085 (int32x2_t) __b);
17088 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17089 vcgtz_u64 (uint64x1_t __a)
17091 return __a > 0ll ? -1ll : 0ll;
17094 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17095 vcgtzq_f32 (float32x4_t __a)
17097 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17098 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
17101 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17102 vcgtzq_f64 (float64x2_t __a)
17104 float64x2_t __b = {0.0, 0.0};
17105 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
17108 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17109 vcgtzq_p8 (poly8x16_t __a)
17111 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17112 0, 0, 0, 0, 0, 0, 0, 0};
17113 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
17114 (int8x16_t) __b);
17117 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17118 vcgtzq_s8 (int8x16_t __a)
17120 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17121 0, 0, 0, 0, 0, 0, 0, 0};
17122 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
17125 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17126 vcgtzq_s16 (int16x8_t __a)
17128 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17129 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
17132 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17133 vcgtzq_s32 (int32x4_t __a)
17135 int32x4_t __b = {0, 0, 0, 0};
17136 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
17139 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17140 vcgtzq_s64 (int64x2_t __a)
17142 int64x2_t __b = {0, 0};
17143 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
17146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17147 vcgtzq_u8 (uint8x16_t __a)
17149 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17150 0, 0, 0, 0, 0, 0, 0, 0};
17151 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
17152 (int8x16_t) __b);
17155 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17156 vcgtzq_u16 (uint16x8_t __a)
17158 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17159 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
17160 (int16x8_t) __b);
17163 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17164 vcgtzq_u32 (uint32x4_t __a)
17166 uint32x4_t __b = {0, 0, 0, 0};
17167 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
17168 (int32x4_t) __b);
17171 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17172 vcgtzq_u64 (uint64x2_t __a)
17174 uint64x2_t __b = {0, 0};
17175 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
17176 (int64x2_t) __b);
17179 /* vcgtz - scalar. */
17181 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17182 vcgtzs_f32 (float32_t __a)
17184 return __a > 0.0f ? -1 : 0;
17187 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17188 vcgtzd_s64 (int64x1_t __a)
17190 return __a > 0 ? -1ll : 0ll;
17193 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17194 vcgtzd_u64 (int64x1_t __a)
17196 return __a > 0 ? -1ll : 0ll;
17199 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17200 vcgtzd_f64 (float64_t __a)
17202 return __a > 0.0 ? -1ll : 0ll;
17205 /* vcle - vector. */
17207 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17208 vcle_f32 (float32x2_t __a, float32x2_t __b)
17210 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
17213 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17214 vcle_f64 (float64x1_t __a, float64x1_t __b)
17216 return __a <= __b ? -1ll : 0ll;
17219 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17220 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
17222 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
17223 (int8x8_t) __a);
17226 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17227 vcle_s8 (int8x8_t __a, int8x8_t __b)
17229 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
17232 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17233 vcle_s16 (int16x4_t __a, int16x4_t __b)
17235 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
17238 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17239 vcle_s32 (int32x2_t __a, int32x2_t __b)
17241 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
17244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17245 vcle_s64 (int64x1_t __a, int64x1_t __b)
17247 return __a <= __b ? -1ll : 0ll;
17250 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17251 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
17253 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
17254 (int8x8_t) __a);
17257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17258 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
17260 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
17261 (int16x4_t) __a);
17264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17265 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
17267 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
17268 (int32x2_t) __a);
17271 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17272 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
17274 return __a <= __b ? -1ll : 0ll;
17277 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17278 vcleq_f32 (float32x4_t __a, float32x4_t __b)
17280 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
17283 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17284 vcleq_f64 (float64x2_t __a, float64x2_t __b)
17286 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
17289 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17290 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
17292 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
17293 (int8x16_t) __a);
17296 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17297 vcleq_s8 (int8x16_t __a, int8x16_t __b)
17299 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
17302 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17303 vcleq_s16 (int16x8_t __a, int16x8_t __b)
17305 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
17308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17309 vcleq_s32 (int32x4_t __a, int32x4_t __b)
17311 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
17314 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17315 vcleq_s64 (int64x2_t __a, int64x2_t __b)
17317 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
17320 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17321 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
17323 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
17324 (int8x16_t) __a);
17327 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17328 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
17330 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
17331 (int16x8_t) __a);
17334 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17335 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
17337 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
17338 (int32x4_t) __a);
17341 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17342 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
17344 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
17345 (int64x2_t) __a);
17348 /* vcle - scalar. */
17350 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17351 vcles_f32 (float32_t __a, float32_t __b)
17353 return __a <= __b ? -1 : 0;
17356 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17357 vcled_s64 (int64x1_t __a, int64x1_t __b)
17359 return __a <= __b ? -1ll : 0ll;
17362 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17363 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
17365 return __a <= __b ? -1ll : 0ll;
17368 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17369 vcled_f64 (float64_t __a, float64_t __b)
17371 return __a <= __b ? -1ll : 0ll;
17374 /* vclez - vector. */
17376 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17377 vclez_f32 (float32x2_t __a)
17379 float32x2_t __b = {0.0f, 0.0f};
17380 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
17383 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17384 vclez_f64 (float64x1_t __a)
17386 return __a <= 0.0 ? -1ll : 0ll;
17389 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17390 vclez_p8 (poly8x8_t __a)
17392 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17393 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
17394 (int8x8_t) __b);
17397 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17398 vclez_s8 (int8x8_t __a)
17400 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17401 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
17404 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17405 vclez_s16 (int16x4_t __a)
17407 int16x4_t __b = {0, 0, 0, 0};
17408 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
17411 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17412 vclez_s32 (int32x2_t __a)
17414 int32x2_t __b = {0, 0};
17415 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
17418 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17419 vclez_s64 (int64x1_t __a)
17421 return __a <= 0ll ? -1ll : 0ll;
17424 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17425 vclez_u64 (uint64x1_t __a)
17427 return __a <= 0ll ? -1ll : 0ll;
17430 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17431 vclezq_f32 (float32x4_t __a)
17433 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17434 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
17437 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17438 vclezq_f64 (float64x2_t __a)
17440 float64x2_t __b = {0.0, 0.0};
17441 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
17444 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17445 vclezq_p8 (poly8x16_t __a)
17447 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17448 0, 0, 0, 0, 0, 0, 0, 0};
17449 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
17450 (int8x16_t) __b);
17453 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17454 vclezq_s8 (int8x16_t __a)
17456 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17457 0, 0, 0, 0, 0, 0, 0, 0};
17458 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
17461 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17462 vclezq_s16 (int16x8_t __a)
17464 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17465 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
17468 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17469 vclezq_s32 (int32x4_t __a)
17471 int32x4_t __b = {0, 0, 0, 0};
17472 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
17475 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17476 vclezq_s64 (int64x2_t __a)
17478 int64x2_t __b = {0, 0};
17479 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
17482 /* vclez - scalar. */
17484 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17485 vclezs_f32 (float32_t __a)
17487 return __a <= 0.0f ? -1 : 0;
17490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17491 vclezd_s64 (int64x1_t __a)
17493 return __a <= 0 ? -1ll : 0ll;
17496 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17497 vclezd_u64 (int64x1_t __a)
17499 return __a <= 0 ? -1ll : 0ll;
17502 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17503 vclezd_f64 (float64_t __a)
17505 return __a <= 0.0 ? -1ll : 0ll;
17508 /* vclt - vector. */
17510 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17511 vclt_f32 (float32x2_t __a, float32x2_t __b)
17513 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
17516 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17517 vclt_f64 (float64x1_t __a, float64x1_t __b)
17519 return __a < __b ? -1ll : 0ll;
17522 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17523 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
17525 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
17526 (int8x8_t) __a);
17529 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17530 vclt_s8 (int8x8_t __a, int8x8_t __b)
17532 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
17535 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17536 vclt_s16 (int16x4_t __a, int16x4_t __b)
17538 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
17541 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17542 vclt_s32 (int32x2_t __a, int32x2_t __b)
17544 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
17547 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17548 vclt_s64 (int64x1_t __a, int64x1_t __b)
17550 return __a < __b ? -1ll : 0ll;
17553 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17554 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
17556 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
17557 (int8x8_t) __a);
17560 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17561 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
17563 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
17564 (int16x4_t) __a);
17567 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17568 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
17570 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
17571 (int32x2_t) __a);
17574 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17575 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
17577 return __a < __b ? -1ll : 0ll;
17580 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17581 vcltq_f32 (float32x4_t __a, float32x4_t __b)
17583 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
17586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17587 vcltq_f64 (float64x2_t __a, float64x2_t __b)
17589 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
17592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17593 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
17595 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
17596 (int8x16_t) __a);
17599 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17600 vcltq_s8 (int8x16_t __a, int8x16_t __b)
17602 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
17605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17606 vcltq_s16 (int16x8_t __a, int16x8_t __b)
17608 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
17611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17612 vcltq_s32 (int32x4_t __a, int32x4_t __b)
17614 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
17617 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17618 vcltq_s64 (int64x2_t __a, int64x2_t __b)
17620 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
17623 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17624 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
17626 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
17627 (int8x16_t) __a);
17630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17631 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
17633 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
17634 (int16x8_t) __a);
17637 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17638 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
17640 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
17641 (int32x4_t) __a);
17644 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17645 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
17647 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
17648 (int64x2_t) __a);
17651 /* vclt - scalar. */
17653 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17654 vclts_f32 (float32_t __a, float32_t __b)
17656 return __a < __b ? -1 : 0;
17659 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17660 vcltd_s64 (int64x1_t __a, int64x1_t __b)
17662 return __a < __b ? -1ll : 0ll;
17665 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17666 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
17668 return __a < __b ? -1ll : 0ll;
17671 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17672 vcltd_f64 (float64_t __a, float64_t __b)
17674 return __a < __b ? -1ll : 0ll;
17677 /* vcltz - vector. */
17679 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17680 vcltz_f32 (float32x2_t __a)
17682 float32x2_t __b = {0.0f, 0.0f};
17683 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
17686 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17687 vcltz_f64 (float64x1_t __a)
17689 return __a < 0.0 ? -1ll : 0ll;
17692 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17693 vcltz_p8 (poly8x8_t __a)
17695 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17696 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
17697 (int8x8_t) __b);
17700 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17701 vcltz_s8 (int8x8_t __a)
17703 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17704 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
17707 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17708 vcltz_s16 (int16x4_t __a)
17710 int16x4_t __b = {0, 0, 0, 0};
17711 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
17714 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17715 vcltz_s32 (int32x2_t __a)
17717 int32x2_t __b = {0, 0};
17718 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
17721 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17722 vcltz_s64 (int64x1_t __a)
17724 return __a < 0ll ? -1ll : 0ll;
17727 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17728 vcltzq_f32 (float32x4_t __a)
17730 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17731 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
17734 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17735 vcltzq_f64 (float64x2_t __a)
17737 float64x2_t __b = {0.0, 0.0};
17738 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
17741 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17742 vcltzq_p8 (poly8x16_t __a)
17744 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17745 0, 0, 0, 0, 0, 0, 0, 0};
17746 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
17747 (int8x16_t) __b);
17750 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17751 vcltzq_s8 (int8x16_t __a)
17753 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17754 0, 0, 0, 0, 0, 0, 0, 0};
17755 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
17758 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17759 vcltzq_s16 (int16x8_t __a)
17761 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17762 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
17765 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17766 vcltzq_s32 (int32x4_t __a)
17768 int32x4_t __b = {0, 0, 0, 0};
17769 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
17772 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17773 vcltzq_s64 (int64x2_t __a)
17775 int64x2_t __b = {0, 0};
17776 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
17779 /* vcltz - scalar. */
17781 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17782 vcltzs_f32 (float32_t __a)
17784 return __a < 0.0f ? -1 : 0;
17787 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17788 vcltzd_s64 (int64x1_t __a)
17790 return __a < 0 ? -1ll : 0ll;
17793 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17794 vcltzd_u64 (int64x1_t __a)
17796 return __a < 0 ? -1ll : 0ll;
17799 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17800 vcltzd_f64 (float64_t __a)
17802 return __a < 0.0 ? -1ll : 0ll;
17805 /* vclz. */
17807 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17808 vclz_s8 (int8x8_t __a)
17810 return __builtin_aarch64_clzv8qi (__a);
17813 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17814 vclz_s16 (int16x4_t __a)
17816 return __builtin_aarch64_clzv4hi (__a);
17819 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17820 vclz_s32 (int32x2_t __a)
17822 return __builtin_aarch64_clzv2si (__a);
17825 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17826 vclz_u8 (uint8x8_t __a)
17828 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
17831 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17832 vclz_u16 (uint16x4_t __a)
17834 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
17837 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17838 vclz_u32 (uint32x2_t __a)
17840 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
17843 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17844 vclzq_s8 (int8x16_t __a)
17846 return __builtin_aarch64_clzv16qi (__a);
17849 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17850 vclzq_s16 (int16x8_t __a)
17852 return __builtin_aarch64_clzv8hi (__a);
17855 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17856 vclzq_s32 (int32x4_t __a)
17858 return __builtin_aarch64_clzv4si (__a);
17861 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17862 vclzq_u8 (uint8x16_t __a)
17864 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
17867 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17868 vclzq_u16 (uint16x8_t __a)
17870 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
17873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17874 vclzq_u32 (uint32x4_t __a)
17876 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
17879 /* vcvt (double -> float). */
17881 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17882 vcvt_f32_f64 (float64x2_t __a)
17884 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
17887 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17888 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
17890 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
17893 /* vcvt (float -> double). */
17895 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17896 vcvt_f64_f32 (float32x2_t __a)
17899 return __builtin_aarch64_float_extend_lo_v2df (__a);
17902 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17903 vcvt_high_f64_f32 (float32x4_t __a)
17905 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
17908 /* vcvt (<u>int -> float) */
17910 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17911 vcvtd_f64_s64 (int64_t __a)
17913 return (float64_t) __a;
17916 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17917 vcvtd_f64_u64 (uint64_t __a)
17919 return (float64_t) __a;
17922 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17923 vcvts_f32_s32 (int32_t __a)
17925 return (float32_t) __a;
17928 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17929 vcvts_f32_u32 (uint32_t __a)
17931 return (float32_t) __a;
17934 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17935 vcvt_f32_s32 (int32x2_t __a)
17937 return __builtin_aarch64_floatv2siv2sf (__a);
17940 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17941 vcvt_f32_u32 (uint32x2_t __a)
17943 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
17946 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17947 vcvtq_f32_s32 (int32x4_t __a)
17949 return __builtin_aarch64_floatv4siv4sf (__a);
17952 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17953 vcvtq_f32_u32 (uint32x4_t __a)
17955 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
17958 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17959 vcvtq_f64_s64 (int64x2_t __a)
17961 return __builtin_aarch64_floatv2div2df (__a);
17964 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17965 vcvtq_f64_u64 (uint64x2_t __a)
17967 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
17970 /* vcvt (float -> <u>int) */
17972 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17973 vcvtd_s64_f64 (float64_t __a)
17975 return (int64_t) __a;
17978 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17979 vcvtd_u64_f64 (float64_t __a)
17981 return (uint64_t) __a;
17984 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17985 vcvts_s32_f32 (float32_t __a)
17987 return (int32_t) __a;
17990 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17991 vcvts_u32_f32 (float32_t __a)
17993 return (uint32_t) __a;
17996 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17997 vcvt_s32_f32 (float32x2_t __a)
17999 return __builtin_aarch64_lbtruncv2sfv2si (__a);
18002 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18003 vcvt_u32_f32 (float32x2_t __a)
18005 /* TODO: This cast should go away when builtins have
18006 their correct types. */
18007 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
18010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18011 vcvtq_s32_f32 (float32x4_t __a)
18013 return __builtin_aarch64_lbtruncv4sfv4si (__a);
18016 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18017 vcvtq_u32_f32 (float32x4_t __a)
18019 /* TODO: This cast should go away when builtins have
18020 their correct types. */
18021 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
18024 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18025 vcvtq_s64_f64 (float64x2_t __a)
18027 return __builtin_aarch64_lbtruncv2dfv2di (__a);
18030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18031 vcvtq_u64_f64 (float64x2_t __a)
18033 /* TODO: This cast should go away when builtins have
18034 their correct types. */
18035 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
18038 /* vcvta */
18040 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18041 vcvtad_s64_f64 (float64_t __a)
18043 return __builtin_aarch64_lrounddfdi (__a);
18046 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18047 vcvtad_u64_f64 (float64_t __a)
18049 return __builtin_aarch64_lroundudfdi (__a);
18052 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18053 vcvtas_s32_f32 (float32_t __a)
18055 return __builtin_aarch64_lroundsfsi (__a);
18058 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18059 vcvtas_u32_f32 (float32_t __a)
18061 return __builtin_aarch64_lroundusfsi (__a);
18064 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18065 vcvta_s32_f32 (float32x2_t __a)
18067 return __builtin_aarch64_lroundv2sfv2si (__a);
18070 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18071 vcvta_u32_f32 (float32x2_t __a)
18073 /* TODO: This cast should go away when builtins have
18074 their correct types. */
18075 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
18078 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18079 vcvtaq_s32_f32 (float32x4_t __a)
18081 return __builtin_aarch64_lroundv4sfv4si (__a);
18084 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18085 vcvtaq_u32_f32 (float32x4_t __a)
18087 /* TODO: This cast should go away when builtins have
18088 their correct types. */
18089 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
18092 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18093 vcvtaq_s64_f64 (float64x2_t __a)
18095 return __builtin_aarch64_lroundv2dfv2di (__a);
18098 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18099 vcvtaq_u64_f64 (float64x2_t __a)
18101 /* TODO: This cast should go away when builtins have
18102 their correct types. */
18103 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
18106 /* vcvtm */
18108 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18109 vcvtmd_s64_f64 (float64_t __a)
18111 return __builtin_lfloor (__a);
18114 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18115 vcvtmd_u64_f64 (float64_t __a)
18117 return __builtin_aarch64_lfloorudfdi (__a);
18120 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18121 vcvtms_s32_f32 (float32_t __a)
18123 return __builtin_ifloorf (__a);
18126 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18127 vcvtms_u32_f32 (float32_t __a)
18129 return __builtin_aarch64_lfloorusfsi (__a);
18132 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18133 vcvtm_s32_f32 (float32x2_t __a)
18135 return __builtin_aarch64_lfloorv2sfv2si (__a);
18138 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18139 vcvtm_u32_f32 (float32x2_t __a)
18141 /* TODO: This cast should go away when builtins have
18142 their correct types. */
18143 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
18146 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18147 vcvtmq_s32_f32 (float32x4_t __a)
18149 return __builtin_aarch64_lfloorv4sfv4si (__a);
18152 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18153 vcvtmq_u32_f32 (float32x4_t __a)
18155 /* TODO: This cast should go away when builtins have
18156 their correct types. */
18157 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
18160 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18161 vcvtmq_s64_f64 (float64x2_t __a)
18163 return __builtin_aarch64_lfloorv2dfv2di (__a);
18166 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18167 vcvtmq_u64_f64 (float64x2_t __a)
18169 /* TODO: This cast should go away when builtins have
18170 their correct types. */
18171 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
18174 /* vcvtn */
18176 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18177 vcvtnd_s64_f64 (float64_t __a)
18179 return __builtin_aarch64_lfrintndfdi (__a);
18182 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18183 vcvtnd_u64_f64 (float64_t __a)
18185 return __builtin_aarch64_lfrintnudfdi (__a);
18188 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18189 vcvtns_s32_f32 (float32_t __a)
18191 return __builtin_aarch64_lfrintnsfsi (__a);
18194 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18195 vcvtns_u32_f32 (float32_t __a)
18197 return __builtin_aarch64_lfrintnusfsi (__a);
18200 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18201 vcvtn_s32_f32 (float32x2_t __a)
18203 return __builtin_aarch64_lfrintnv2sfv2si (__a);
18206 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18207 vcvtn_u32_f32 (float32x2_t __a)
18209 /* TODO: This cast should go away when builtins have
18210 their correct types. */
18211 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
18214 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18215 vcvtnq_s32_f32 (float32x4_t __a)
18217 return __builtin_aarch64_lfrintnv4sfv4si (__a);
18220 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18221 vcvtnq_u32_f32 (float32x4_t __a)
18223 /* TODO: This cast should go away when builtins have
18224 their correct types. */
18225 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
18228 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18229 vcvtnq_s64_f64 (float64x2_t __a)
18231 return __builtin_aarch64_lfrintnv2dfv2di (__a);
18234 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18235 vcvtnq_u64_f64 (float64x2_t __a)
18237 /* TODO: This cast should go away when builtins have
18238 their correct types. */
18239 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
18242 /* vcvtp */
18244 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18245 vcvtpd_s64_f64 (float64_t __a)
18247 return __builtin_lceil (__a);
18250 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18251 vcvtpd_u64_f64 (float64_t __a)
18253 return __builtin_aarch64_lceiludfdi (__a);
18256 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18257 vcvtps_s32_f32 (float32_t __a)
18259 return __builtin_iceilf (__a);
18262 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18263 vcvtps_u32_f32 (float32_t __a)
18265 return __builtin_aarch64_lceilusfsi (__a);
18268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18269 vcvtp_s32_f32 (float32x2_t __a)
18271 return __builtin_aarch64_lceilv2sfv2si (__a);
18274 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18275 vcvtp_u32_f32 (float32x2_t __a)
18277 /* TODO: This cast should go away when builtins have
18278 their correct types. */
18279 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
18282 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18283 vcvtpq_s32_f32 (float32x4_t __a)
18285 return __builtin_aarch64_lceilv4sfv4si (__a);
18288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18289 vcvtpq_u32_f32 (float32x4_t __a)
18291 /* TODO: This cast should go away when builtins have
18292 their correct types. */
18293 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
18296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18297 vcvtpq_s64_f64 (float64x2_t __a)
18299 return __builtin_aarch64_lceilv2dfv2di (__a);
18302 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18303 vcvtpq_u64_f64 (float64x2_t __a)
18305 /* TODO: This cast should go away when builtins have
18306 their correct types. */
18307 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
18310 /* vdup_n */
18312 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18313 vdup_n_f32 (float32_t __a)
18315 return (float32x2_t) {__a, __a};
18318 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18319 vdup_n_f64 (float64_t __a)
18321 return __a;
18324 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18325 vdup_n_p8 (poly8_t __a)
18327 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18330 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18331 vdup_n_p16 (poly16_t __a)
18333 return (poly16x4_t) {__a, __a, __a, __a};
18336 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18337 vdup_n_s8 (int8_t __a)
18339 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18342 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18343 vdup_n_s16 (int16_t __a)
18345 return (int16x4_t) {__a, __a, __a, __a};
18348 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18349 vdup_n_s32 (int32_t __a)
18351 return (int32x2_t) {__a, __a};
18354 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18355 vdup_n_s64 (int64_t __a)
18357 return __a;
18360 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18361 vdup_n_u8 (uint8_t __a)
18363 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18366 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18367 vdup_n_u16 (uint16_t __a)
18369 return (uint16x4_t) {__a, __a, __a, __a};
18372 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18373 vdup_n_u32 (uint32_t __a)
18375 return (uint32x2_t) {__a, __a};
18378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18379 vdup_n_u64 (uint64_t __a)
18381 return __a;
18384 /* vdupq_n */
18386 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18387 vdupq_n_f32 (float32_t __a)
18389 return (float32x4_t) {__a, __a, __a, __a};
18392 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18393 vdupq_n_f64 (float64_t __a)
18395 return (float64x2_t) {__a, __a};
18398 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18399 vdupq_n_p8 (uint32_t __a)
18401 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18402 __a, __a, __a, __a, __a, __a, __a, __a};
18405 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18406 vdupq_n_p16 (uint32_t __a)
18408 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18411 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18412 vdupq_n_s8 (int32_t __a)
18414 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18415 __a, __a, __a, __a, __a, __a, __a, __a};
18418 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18419 vdupq_n_s16 (int32_t __a)
18421 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18424 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18425 vdupq_n_s32 (int32_t __a)
18427 return (int32x4_t) {__a, __a, __a, __a};
18430 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18431 vdupq_n_s64 (int64_t __a)
18433 return (int64x2_t) {__a, __a};
18436 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18437 vdupq_n_u8 (uint32_t __a)
18439 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18440 __a, __a, __a, __a, __a, __a, __a, __a};
18443 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18444 vdupq_n_u16 (uint32_t __a)
18446 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18450 vdupq_n_u32 (uint32_t __a)
18452 return (uint32x4_t) {__a, __a, __a, __a};
18455 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18456 vdupq_n_u64 (uint64_t __a)
18458 return (uint64x2_t) {__a, __a};
18461 /* vdup_lane */
18463 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18464 vdup_lane_f32 (float32x2_t __a, const int __b)
18466 return __aarch64_vdup_lane_f32 (__a, __b);
18469 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18470 vdup_lane_f64 (float64x1_t __a, const int __b)
18472 return __aarch64_vdup_lane_f64 (__a, __b);
18475 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18476 vdup_lane_p8 (poly8x8_t __a, const int __b)
18478 return __aarch64_vdup_lane_p8 (__a, __b);
18481 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18482 vdup_lane_p16 (poly16x4_t __a, const int __b)
18484 return __aarch64_vdup_lane_p16 (__a, __b);
18487 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18488 vdup_lane_s8 (int8x8_t __a, const int __b)
18490 return __aarch64_vdup_lane_s8 (__a, __b);
18493 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18494 vdup_lane_s16 (int16x4_t __a, const int __b)
18496 return __aarch64_vdup_lane_s16 (__a, __b);
18499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18500 vdup_lane_s32 (int32x2_t __a, const int __b)
18502 return __aarch64_vdup_lane_s32 (__a, __b);
18505 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18506 vdup_lane_s64 (int64x1_t __a, const int __b)
18508 return __aarch64_vdup_lane_s64 (__a, __b);
18511 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18512 vdup_lane_u8 (uint8x8_t __a, const int __b)
18514 return __aarch64_vdup_lane_u8 (__a, __b);
18517 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18518 vdup_lane_u16 (uint16x4_t __a, const int __b)
18520 return __aarch64_vdup_lane_u16 (__a, __b);
18523 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18524 vdup_lane_u32 (uint32x2_t __a, const int __b)
18526 return __aarch64_vdup_lane_u32 (__a, __b);
18529 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18530 vdup_lane_u64 (uint64x1_t __a, const int __b)
18532 return __aarch64_vdup_lane_u64 (__a, __b);
18535 /* vdup_laneq */
18537 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18538 vdup_laneq_f32 (float32x4_t __a, const int __b)
18540 return __aarch64_vdup_laneq_f32 (__a, __b);
18543 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18544 vdup_laneq_f64 (float64x2_t __a, const int __b)
18546 return __aarch64_vdup_laneq_f64 (__a, __b);
18549 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18550 vdup_laneq_p8 (poly8x16_t __a, const int __b)
18552 return __aarch64_vdup_laneq_p8 (__a, __b);
18555 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18556 vdup_laneq_p16 (poly16x8_t __a, const int __b)
18558 return __aarch64_vdup_laneq_p16 (__a, __b);
18561 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18562 vdup_laneq_s8 (int8x16_t __a, const int __b)
18564 return __aarch64_vdup_laneq_s8 (__a, __b);
18567 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18568 vdup_laneq_s16 (int16x8_t __a, const int __b)
18570 return __aarch64_vdup_laneq_s16 (__a, __b);
18573 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18574 vdup_laneq_s32 (int32x4_t __a, const int __b)
18576 return __aarch64_vdup_laneq_s32 (__a, __b);
18579 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18580 vdup_laneq_s64 (int64x2_t __a, const int __b)
18582 return __aarch64_vdup_laneq_s64 (__a, __b);
18585 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18586 vdup_laneq_u8 (uint8x16_t __a, const int __b)
18588 return __aarch64_vdup_laneq_u8 (__a, __b);
18591 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18592 vdup_laneq_u16 (uint16x8_t __a, const int __b)
18594 return __aarch64_vdup_laneq_u16 (__a, __b);
18597 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18598 vdup_laneq_u32 (uint32x4_t __a, const int __b)
18600 return __aarch64_vdup_laneq_u32 (__a, __b);
18603 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18604 vdup_laneq_u64 (uint64x2_t __a, const int __b)
18606 return __aarch64_vdup_laneq_u64 (__a, __b);
18609 /* vdupq_lane */
18610 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18611 vdupq_lane_f32 (float32x2_t __a, const int __b)
18613 return __aarch64_vdupq_lane_f32 (__a, __b);
18616 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18617 vdupq_lane_f64 (float64x1_t __a, const int __b)
18619 return __aarch64_vdupq_lane_f64 (__a, __b);
18622 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18623 vdupq_lane_p8 (poly8x8_t __a, const int __b)
18625 return __aarch64_vdupq_lane_p8 (__a, __b);
18628 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18629 vdupq_lane_p16 (poly16x4_t __a, const int __b)
18631 return __aarch64_vdupq_lane_p16 (__a, __b);
18634 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18635 vdupq_lane_s8 (int8x8_t __a, const int __b)
18637 return __aarch64_vdupq_lane_s8 (__a, __b);
18640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18641 vdupq_lane_s16 (int16x4_t __a, const int __b)
18643 return __aarch64_vdupq_lane_s16 (__a, __b);
18646 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18647 vdupq_lane_s32 (int32x2_t __a, const int __b)
18649 return __aarch64_vdupq_lane_s32 (__a, __b);
18652 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18653 vdupq_lane_s64 (int64x1_t __a, const int __b)
18655 return __aarch64_vdupq_lane_s64 (__a, __b);
18658 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18659 vdupq_lane_u8 (uint8x8_t __a, const int __b)
18661 return __aarch64_vdupq_lane_u8 (__a, __b);
18664 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18665 vdupq_lane_u16 (uint16x4_t __a, const int __b)
18667 return __aarch64_vdupq_lane_u16 (__a, __b);
18670 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18671 vdupq_lane_u32 (uint32x2_t __a, const int __b)
18673 return __aarch64_vdupq_lane_u32 (__a, __b);
18676 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18677 vdupq_lane_u64 (uint64x1_t __a, const int __b)
18679 return __aarch64_vdupq_lane_u64 (__a, __b);
18682 /* vdupq_laneq */
18683 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18684 vdupq_laneq_f32 (float32x4_t __a, const int __b)
18686 return __aarch64_vdupq_laneq_f32 (__a, __b);
18689 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18690 vdupq_laneq_f64 (float64x2_t __a, const int __b)
18692 return __aarch64_vdupq_laneq_f64 (__a, __b);
18695 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18696 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
18698 return __aarch64_vdupq_laneq_p8 (__a, __b);
18701 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18702 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
18704 return __aarch64_vdupq_laneq_p16 (__a, __b);
18707 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18708 vdupq_laneq_s8 (int8x16_t __a, const int __b)
18710 return __aarch64_vdupq_laneq_s8 (__a, __b);
18713 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18714 vdupq_laneq_s16 (int16x8_t __a, const int __b)
18716 return __aarch64_vdupq_laneq_s16 (__a, __b);
18719 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18720 vdupq_laneq_s32 (int32x4_t __a, const int __b)
18722 return __aarch64_vdupq_laneq_s32 (__a, __b);
18725 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18726 vdupq_laneq_s64 (int64x2_t __a, const int __b)
18728 return __aarch64_vdupq_laneq_s64 (__a, __b);
18731 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18732 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
18734 return __aarch64_vdupq_laneq_u8 (__a, __b);
18737 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18738 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
18740 return __aarch64_vdupq_laneq_u16 (__a, __b);
18743 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18744 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
18746 return __aarch64_vdupq_laneq_u32 (__a, __b);
18749 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18750 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
18752 return __aarch64_vdupq_laneq_u64 (__a, __b);
18755 /* vdupb_lane */
18756 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18757 vdupb_lane_p8 (poly8x8_t __a, const int __b)
18759 return __aarch64_vget_lane_p8 (__a, __b);
18762 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18763 vdupb_lane_s8 (int8x8_t __a, const int __b)
18765 return __aarch64_vget_lane_s8 (__a, __b);
18768 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18769 vdupb_lane_u8 (uint8x8_t __a, const int __b)
18771 return __aarch64_vget_lane_u8 (__a, __b);
18774 /* vduph_lane */
18775 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18776 vduph_lane_p16 (poly16x4_t __a, const int __b)
18778 return __aarch64_vget_lane_p16 (__a, __b);
18781 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18782 vduph_lane_s16 (int16x4_t __a, const int __b)
18784 return __aarch64_vget_lane_s16 (__a, __b);
18787 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18788 vduph_lane_u16 (uint16x4_t __a, const int __b)
18790 return __aarch64_vget_lane_u16 (__a, __b);
18793 /* vdups_lane */
18794 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18795 vdups_lane_f32 (float32x2_t __a, const int __b)
18797 return __aarch64_vget_lane_f32 (__a, __b);
18800 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18801 vdups_lane_s32 (int32x2_t __a, const int __b)
18803 return __aarch64_vget_lane_s32 (__a, __b);
18806 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18807 vdups_lane_u32 (uint32x2_t __a, const int __b)
18809 return __aarch64_vget_lane_u32 (__a, __b);
18812 /* vdupd_lane */
18813 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18814 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
18816 return __a;
18819 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18820 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
18822 return __a;
18825 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18826 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
18828 return __a;
18831 /* vdupb_laneq */
18832 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18833 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
18835 return __aarch64_vgetq_lane_p8 (__a, __b);
18838 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18839 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
18841 return __aarch64_vgetq_lane_s8 (__a, __b);
18844 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18845 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
18847 return __aarch64_vgetq_lane_u8 (__a, __b);
18850 /* vduph_laneq */
18851 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18852 vduph_laneq_p16 (poly16x8_t __a, const int __b)
18854 return __aarch64_vgetq_lane_p16 (__a, __b);
18857 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18858 vduph_laneq_s16 (int16x8_t __a, const int __b)
18860 return __aarch64_vgetq_lane_s16 (__a, __b);
18863 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18864 vduph_laneq_u16 (uint16x8_t __a, const int __b)
18866 return __aarch64_vgetq_lane_u16 (__a, __b);
18869 /* vdups_laneq */
18870 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18871 vdups_laneq_f32 (float32x4_t __a, const int __b)
18873 return __aarch64_vgetq_lane_f32 (__a, __b);
18876 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18877 vdups_laneq_s32 (int32x4_t __a, const int __b)
18879 return __aarch64_vgetq_lane_s32 (__a, __b);
18882 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18883 vdups_laneq_u32 (uint32x4_t __a, const int __b)
18885 return __aarch64_vgetq_lane_u32 (__a, __b);
18888 /* vdupd_laneq */
18889 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18890 vdupd_laneq_f64 (float64x2_t __a, const int __b)
18892 return __aarch64_vgetq_lane_f64 (__a, __b);
18895 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18896 vdupd_laneq_s64 (int64x2_t __a, const int __b)
18898 return __aarch64_vgetq_lane_s64 (__a, __b);
18901 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18902 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
18904 return __aarch64_vgetq_lane_u64 (__a, __b);
18907 /* vfma_lane */
18909 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18910 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
18911 float32x2_t __c, const int __lane)
18913 return __builtin_aarch64_fmav2sf (__b,
18914 __aarch64_vdup_lane_f32 (__c, __lane),
18915 __a);
18918 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18919 vfma_lane_f64 (float64_t __a, float64_t __b,
18920 float64_t __c, const int __lane)
18922 return __builtin_fma (__b, __c, __a);
18925 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18926 vfmad_lane_f64 (float64_t __a, float64_t __b,
18927 float64_t __c, const int __lane)
18929 return __builtin_fma (__b, __c, __a);
18932 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18933 vfmas_lane_f32 (float32_t __a, float32_t __b,
18934 float32x2_t __c, const int __lane)
18936 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18939 /* vfma_laneq */
18941 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18942 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
18943 float32x4_t __c, const int __lane)
18945 return __builtin_aarch64_fmav2sf (__b,
18946 __aarch64_vdup_laneq_f32 (__c, __lane),
18947 __a);
18950 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18951 vfma_laneq_f64 (float64_t __a, float64_t __b,
18952 float64x2_t __c, const int __lane)
18954 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18957 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18958 vfmad_laneq_f64 (float64_t __a, float64_t __b,
18959 float64x2_t __c, const int __lane)
18961 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18964 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18965 vfmas_laneq_f32 (float32_t __a, float32_t __b,
18966 float32x4_t __c, const int __lane)
18968 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18971 /* vfmaq_lane */
18973 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18974 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18975 float32x2_t __c, const int __lane)
18977 return __builtin_aarch64_fmav4sf (__b,
18978 __aarch64_vdupq_lane_f32 (__c, __lane),
18979 __a);
18982 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18983 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
18984 float64_t __c, const int __lane)
18986 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
18989 /* vfmaq_laneq */
18991 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18992 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18993 float32x4_t __c, const int __lane)
18995 return __builtin_aarch64_fmav4sf (__b,
18996 __aarch64_vdupq_laneq_f32 (__c, __lane),
18997 __a);
19000 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19001 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
19002 float64x2_t __c, const int __lane)
19004 return __builtin_aarch64_fmav2df (__b,
19005 __aarch64_vdupq_laneq_f64 (__c, __lane),
19006 __a);
19009 /* vfms_lane */
19011 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19012 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
19013 float32x2_t __c, const int __lane)
19015 return __builtin_aarch64_fmav2sf (-__b,
19016 __aarch64_vdup_lane_f32 (__c, __lane),
19017 __a);
19020 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19021 vfms_lane_f64 (float64_t __a, float64_t __b,
19022 float64_t __c, const int __lane)
19024 return __builtin_fma (-__b, __c, __a);
19027 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19028 vfmsd_lane_f64 (float64_t __a, float64_t __b,
19029 float64_t __c, const int __lane)
19031 return __builtin_fma (-__b, __c, __a);
19034 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19035 vfmss_lane_f32 (float32_t __a, float32_t __b,
19036 float32x2_t __c, const int __lane)
19038 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
19041 /* vfms_laneq */
19043 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19044 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
19045 float32x4_t __c, const int __lane)
19047 return __builtin_aarch64_fmav2sf (-__b,
19048 __aarch64_vdup_laneq_f32 (__c, __lane),
19049 __a);
19052 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19053 vfms_laneq_f64 (float64_t __a, float64_t __b,
19054 float64x2_t __c, const int __lane)
19056 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
19059 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19060 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
19061 float64x2_t __c, const int __lane)
19063 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
19066 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19067 vfmss_laneq_f32 (float32_t __a, float32_t __b,
19068 float32x4_t __c, const int __lane)
19070 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
19073 /* vfmsq_lane */
19075 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19076 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
19077 float32x2_t __c, const int __lane)
19079 return __builtin_aarch64_fmav4sf (-__b,
19080 __aarch64_vdupq_lane_f32 (__c, __lane),
19081 __a);
19084 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19085 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
19086 float64_t __c, const int __lane)
19088 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
19091 /* vfmsq_laneq */
19093 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19094 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
19095 float32x4_t __c, const int __lane)
19097 return __builtin_aarch64_fmav4sf (-__b,
19098 __aarch64_vdupq_laneq_f32 (__c, __lane),
19099 __a);
19102 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19103 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
19104 float64x2_t __c, const int __lane)
19106 return __builtin_aarch64_fmav2df (-__b,
19107 __aarch64_vdupq_laneq_f64 (__c, __lane),
19108 __a);
19111 /* vld1 */
19113 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19114 vld1_f32 (const float32_t *a)
19116 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
19119 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19120 vld1_f64 (const float64_t *a)
19122 return *a;
19125 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19126 vld1_p8 (const poly8_t *a)
19128 return (poly8x8_t)
19129 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19132 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19133 vld1_p16 (const poly16_t *a)
19135 return (poly16x4_t)
19136 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19139 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19140 vld1_s8 (const int8_t *a)
19142 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19145 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19146 vld1_s16 (const int16_t *a)
19148 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19151 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19152 vld1_s32 (const int32_t *a)
19154 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19157 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19158 vld1_s64 (const int64_t *a)
19160 return *a;
19163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19164 vld1_u8 (const uint8_t *a)
19166 return (uint8x8_t)
19167 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19171 vld1_u16 (const uint16_t *a)
19173 return (uint16x4_t)
19174 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19177 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19178 vld1_u32 (const uint32_t *a)
19180 return (uint32x2_t)
19181 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19184 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19185 vld1_u64 (const uint64_t *a)
19187 return *a;
19190 /* vld1q */
19192 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19193 vld1q_f32 (const float32_t *a)
19195 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
19198 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19199 vld1q_f64 (const float64_t *a)
19201 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
19204 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19205 vld1q_p8 (const poly8_t *a)
19207 return (poly8x16_t)
19208 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19211 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19212 vld1q_p16 (const poly16_t *a)
19214 return (poly16x8_t)
19215 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19218 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19219 vld1q_s8 (const int8_t *a)
19221 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19224 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19225 vld1q_s16 (const int16_t *a)
19227 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19231 vld1q_s32 (const int32_t *a)
19233 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19236 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19237 vld1q_s64 (const int64_t *a)
19239 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19242 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19243 vld1q_u8 (const uint8_t *a)
19245 return (uint8x16_t)
19246 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19249 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19250 vld1q_u16 (const uint16_t *a)
19252 return (uint16x8_t)
19253 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19256 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19257 vld1q_u32 (const uint32_t *a)
19259 return (uint32x4_t)
19260 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19263 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19264 vld1q_u64 (const uint64_t *a)
19266 return (uint64x2_t)
19267 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19270 /* vldn */
19272 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
19273 vld2_s64 (const int64_t * __a)
19275 int64x1x2_t ret;
19276 __builtin_aarch64_simd_oi __o;
19277 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19278 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19279 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19280 return ret;
19283 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
19284 vld2_u64 (const uint64_t * __a)
19286 uint64x1x2_t ret;
19287 __builtin_aarch64_simd_oi __o;
19288 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19289 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19290 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19291 return ret;
19294 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
19295 vld2_f64 (const float64_t * __a)
19297 float64x1x2_t ret;
19298 __builtin_aarch64_simd_oi __o;
19299 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
19300 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
19301 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
19302 return ret;
19305 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
19306 vld2_s8 (const int8_t * __a)
19308 int8x8x2_t ret;
19309 __builtin_aarch64_simd_oi __o;
19310 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19311 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19312 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19313 return ret;
19316 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
19317 vld2_p8 (const poly8_t * __a)
19319 poly8x8x2_t ret;
19320 __builtin_aarch64_simd_oi __o;
19321 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19322 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19323 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19324 return ret;
19327 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
19328 vld2_s16 (const int16_t * __a)
19330 int16x4x2_t ret;
19331 __builtin_aarch64_simd_oi __o;
19332 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19333 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19334 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19335 return ret;
19338 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
19339 vld2_p16 (const poly16_t * __a)
19341 poly16x4x2_t ret;
19342 __builtin_aarch64_simd_oi __o;
19343 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19344 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19345 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19346 return ret;
19349 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
19350 vld2_s32 (const int32_t * __a)
19352 int32x2x2_t ret;
19353 __builtin_aarch64_simd_oi __o;
19354 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
19355 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
19356 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
19357 return ret;
19360 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
19361 vld2_u8 (const uint8_t * __a)
19363 uint8x8x2_t ret;
19364 __builtin_aarch64_simd_oi __o;
19365 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19366 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19367 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19368 return ret;
19371 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
19372 vld2_u16 (const uint16_t * __a)
19374 uint16x4x2_t ret;
19375 __builtin_aarch64_simd_oi __o;
19376 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
19377 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
19378 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
19379 return ret;
19382 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
19383 vld2_u32 (const uint32_t * __a)
19385 uint32x2x2_t ret;
19386 __builtin_aarch64_simd_oi __o;
19387 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
19388 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
19389 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
19390 return ret;
19393 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
19394 vld2_f32 (const float32_t * __a)
19396 float32x2x2_t ret;
19397 __builtin_aarch64_simd_oi __o;
19398 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
19399 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
19400 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
19401 return ret;
19404 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
19405 vld2q_s8 (const int8_t * __a)
19407 int8x16x2_t ret;
19408 __builtin_aarch64_simd_oi __o;
19409 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19410 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19411 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19412 return ret;
19415 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
19416 vld2q_p8 (const poly8_t * __a)
19418 poly8x16x2_t ret;
19419 __builtin_aarch64_simd_oi __o;
19420 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19421 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19422 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19423 return ret;
19426 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
19427 vld2q_s16 (const int16_t * __a)
19429 int16x8x2_t ret;
19430 __builtin_aarch64_simd_oi __o;
19431 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19432 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19433 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19434 return ret;
19437 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
19438 vld2q_p16 (const poly16_t * __a)
19440 poly16x8x2_t ret;
19441 __builtin_aarch64_simd_oi __o;
19442 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19443 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19444 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19445 return ret;
19448 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
19449 vld2q_s32 (const int32_t * __a)
19451 int32x4x2_t ret;
19452 __builtin_aarch64_simd_oi __o;
19453 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19454 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19455 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19456 return ret;
19459 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
19460 vld2q_s64 (const int64_t * __a)
19462 int64x2x2_t ret;
19463 __builtin_aarch64_simd_oi __o;
19464 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19465 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19466 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19467 return ret;
19470 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
19471 vld2q_u8 (const uint8_t * __a)
19473 uint8x16x2_t ret;
19474 __builtin_aarch64_simd_oi __o;
19475 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19476 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19477 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19478 return ret;
19481 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
19482 vld2q_u16 (const uint16_t * __a)
19484 uint16x8x2_t ret;
19485 __builtin_aarch64_simd_oi __o;
19486 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19487 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19488 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19489 return ret;
19492 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
19493 vld2q_u32 (const uint32_t * __a)
19495 uint32x4x2_t ret;
19496 __builtin_aarch64_simd_oi __o;
19497 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19498 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19499 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19500 return ret;
19503 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
19504 vld2q_u64 (const uint64_t * __a)
19506 uint64x2x2_t ret;
19507 __builtin_aarch64_simd_oi __o;
19508 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19509 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19510 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19511 return ret;
19514 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
19515 vld2q_f32 (const float32_t * __a)
19517 float32x4x2_t ret;
19518 __builtin_aarch64_simd_oi __o;
19519 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
19520 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
19521 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
19522 return ret;
19525 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
19526 vld2q_f64 (const float64_t * __a)
19528 float64x2x2_t ret;
19529 __builtin_aarch64_simd_oi __o;
19530 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
19531 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
19532 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
19533 return ret;
19536 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
19537 vld3_s64 (const int64_t * __a)
19539 int64x1x3_t ret;
19540 __builtin_aarch64_simd_ci __o;
19541 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19542 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19543 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19544 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19545 return ret;
19548 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
19549 vld3_u64 (const uint64_t * __a)
19551 uint64x1x3_t ret;
19552 __builtin_aarch64_simd_ci __o;
19553 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19554 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19555 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19556 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19557 return ret;
19560 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
19561 vld3_f64 (const float64_t * __a)
19563 float64x1x3_t ret;
19564 __builtin_aarch64_simd_ci __o;
19565 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
19566 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
19567 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
19568 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
19569 return ret;
19572 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
19573 vld3_s8 (const int8_t * __a)
19575 int8x8x3_t ret;
19576 __builtin_aarch64_simd_ci __o;
19577 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19578 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19579 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19580 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19581 return ret;
19584 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
19585 vld3_p8 (const poly8_t * __a)
19587 poly8x8x3_t ret;
19588 __builtin_aarch64_simd_ci __o;
19589 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19590 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19591 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19592 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19593 return ret;
19596 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
19597 vld3_s16 (const int16_t * __a)
19599 int16x4x3_t ret;
19600 __builtin_aarch64_simd_ci __o;
19601 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19602 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19603 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19604 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19605 return ret;
19608 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
19609 vld3_p16 (const poly16_t * __a)
19611 poly16x4x3_t ret;
19612 __builtin_aarch64_simd_ci __o;
19613 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19614 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19615 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19616 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19617 return ret;
19620 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
19621 vld3_s32 (const int32_t * __a)
19623 int32x2x3_t ret;
19624 __builtin_aarch64_simd_ci __o;
19625 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19626 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19627 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19628 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19629 return ret;
19632 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
19633 vld3_u8 (const uint8_t * __a)
19635 uint8x8x3_t ret;
19636 __builtin_aarch64_simd_ci __o;
19637 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19638 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19639 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19640 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19641 return ret;
19644 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
19645 vld3_u16 (const uint16_t * __a)
19647 uint16x4x3_t ret;
19648 __builtin_aarch64_simd_ci __o;
19649 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19650 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19651 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19652 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19653 return ret;
19656 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
19657 vld3_u32 (const uint32_t * __a)
19659 uint32x2x3_t ret;
19660 __builtin_aarch64_simd_ci __o;
19661 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19662 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19663 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19664 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19665 return ret;
19668 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
19669 vld3_f32 (const float32_t * __a)
19671 float32x2x3_t ret;
19672 __builtin_aarch64_simd_ci __o;
19673 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
19674 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
19675 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
19676 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
19677 return ret;
19680 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
19681 vld3q_s8 (const int8_t * __a)
19683 int8x16x3_t ret;
19684 __builtin_aarch64_simd_ci __o;
19685 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19686 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19687 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19688 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19689 return ret;
19692 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
19693 vld3q_p8 (const poly8_t * __a)
19695 poly8x16x3_t ret;
19696 __builtin_aarch64_simd_ci __o;
19697 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19698 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19699 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19700 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19701 return ret;
19704 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
19705 vld3q_s16 (const int16_t * __a)
19707 int16x8x3_t ret;
19708 __builtin_aarch64_simd_ci __o;
19709 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19710 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19711 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19712 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19713 return ret;
19716 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
19717 vld3q_p16 (const poly16_t * __a)
19719 poly16x8x3_t ret;
19720 __builtin_aarch64_simd_ci __o;
19721 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19722 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19723 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19724 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19725 return ret;
19728 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
19729 vld3q_s32 (const int32_t * __a)
19731 int32x4x3_t ret;
19732 __builtin_aarch64_simd_ci __o;
19733 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19734 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19735 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19736 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19737 return ret;
19740 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
19741 vld3q_s64 (const int64_t * __a)
19743 int64x2x3_t ret;
19744 __builtin_aarch64_simd_ci __o;
19745 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19746 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19747 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19748 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19749 return ret;
19752 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
19753 vld3q_u8 (const uint8_t * __a)
19755 uint8x16x3_t ret;
19756 __builtin_aarch64_simd_ci __o;
19757 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19758 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19759 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19760 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19761 return ret;
19764 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
19765 vld3q_u16 (const uint16_t * __a)
19767 uint16x8x3_t ret;
19768 __builtin_aarch64_simd_ci __o;
19769 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19770 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19771 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19772 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19773 return ret;
19776 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
19777 vld3q_u32 (const uint32_t * __a)
19779 uint32x4x3_t ret;
19780 __builtin_aarch64_simd_ci __o;
19781 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19782 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19783 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19784 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19785 return ret;
19788 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
19789 vld3q_u64 (const uint64_t * __a)
19791 uint64x2x3_t ret;
19792 __builtin_aarch64_simd_ci __o;
19793 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19794 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19795 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19796 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19797 return ret;
19800 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
19801 vld3q_f32 (const float32_t * __a)
19803 float32x4x3_t ret;
19804 __builtin_aarch64_simd_ci __o;
19805 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
19806 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
19807 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
19808 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
19809 return ret;
19812 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
19813 vld3q_f64 (const float64_t * __a)
19815 float64x2x3_t ret;
19816 __builtin_aarch64_simd_ci __o;
19817 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
19818 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
19819 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
19820 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
19821 return ret;
19824 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
19825 vld4_s64 (const int64_t * __a)
19827 int64x1x4_t ret;
19828 __builtin_aarch64_simd_xi __o;
19829 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19830 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19831 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19832 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19833 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19834 return ret;
19837 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
19838 vld4_u64 (const uint64_t * __a)
19840 uint64x1x4_t ret;
19841 __builtin_aarch64_simd_xi __o;
19842 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19843 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19844 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19845 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19846 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19847 return ret;
19850 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
19851 vld4_f64 (const float64_t * __a)
19853 float64x1x4_t ret;
19854 __builtin_aarch64_simd_xi __o;
19855 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
19856 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
19857 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
19858 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
19859 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
19860 return ret;
19863 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
19864 vld4_s8 (const int8_t * __a)
19866 int8x8x4_t ret;
19867 __builtin_aarch64_simd_xi __o;
19868 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19869 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19870 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19871 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19872 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19873 return ret;
19876 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
19877 vld4_p8 (const poly8_t * __a)
19879 poly8x8x4_t ret;
19880 __builtin_aarch64_simd_xi __o;
19881 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19882 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19883 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19884 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19885 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19886 return ret;
19889 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
19890 vld4_s16 (const int16_t * __a)
19892 int16x4x4_t ret;
19893 __builtin_aarch64_simd_xi __o;
19894 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19895 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19896 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19897 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19898 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19899 return ret;
19902 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
19903 vld4_p16 (const poly16_t * __a)
19905 poly16x4x4_t ret;
19906 __builtin_aarch64_simd_xi __o;
19907 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19908 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19909 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19910 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19911 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19912 return ret;
19915 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
19916 vld4_s32 (const int32_t * __a)
19918 int32x2x4_t ret;
19919 __builtin_aarch64_simd_xi __o;
19920 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19921 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19922 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19923 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19924 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19925 return ret;
19928 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
19929 vld4_u8 (const uint8_t * __a)
19931 uint8x8x4_t ret;
19932 __builtin_aarch64_simd_xi __o;
19933 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19934 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19935 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19936 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19937 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19938 return ret;
19941 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
19942 vld4_u16 (const uint16_t * __a)
19944 uint16x4x4_t ret;
19945 __builtin_aarch64_simd_xi __o;
19946 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19947 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19948 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19949 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19950 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19951 return ret;
19954 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
19955 vld4_u32 (const uint32_t * __a)
19957 uint32x2x4_t ret;
19958 __builtin_aarch64_simd_xi __o;
19959 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19960 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19961 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19962 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19963 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19964 return ret;
19967 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
19968 vld4_f32 (const float32_t * __a)
19970 float32x2x4_t ret;
19971 __builtin_aarch64_simd_xi __o;
19972 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
19973 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
19974 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
19975 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
19976 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
19977 return ret;
19980 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
19981 vld4q_s8 (const int8_t * __a)
19983 int8x16x4_t ret;
19984 __builtin_aarch64_simd_xi __o;
19985 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19986 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19987 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19988 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19989 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19990 return ret;
19993 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
19994 vld4q_p8 (const poly8_t * __a)
19996 poly8x16x4_t ret;
19997 __builtin_aarch64_simd_xi __o;
19998 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19999 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20000 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20001 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20002 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20003 return ret;
20006 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
20007 vld4q_s16 (const int16_t * __a)
20009 int16x8x4_t ret;
20010 __builtin_aarch64_simd_xi __o;
20011 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20012 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20013 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20014 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20015 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20016 return ret;
20019 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
20020 vld4q_p16 (const poly16_t * __a)
20022 poly16x8x4_t ret;
20023 __builtin_aarch64_simd_xi __o;
20024 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20025 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20026 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20027 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20028 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20029 return ret;
20032 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
20033 vld4q_s32 (const int32_t * __a)
20035 int32x4x4_t ret;
20036 __builtin_aarch64_simd_xi __o;
20037 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20038 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20039 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20040 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20041 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20042 return ret;
20045 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
20046 vld4q_s64 (const int64_t * __a)
20048 int64x2x4_t ret;
20049 __builtin_aarch64_simd_xi __o;
20050 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20051 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20052 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20053 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20054 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20055 return ret;
20058 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
20059 vld4q_u8 (const uint8_t * __a)
20061 uint8x16x4_t ret;
20062 __builtin_aarch64_simd_xi __o;
20063 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20064 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20065 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20066 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20067 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20068 return ret;
20071 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
20072 vld4q_u16 (const uint16_t * __a)
20074 uint16x8x4_t ret;
20075 __builtin_aarch64_simd_xi __o;
20076 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20077 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20078 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20079 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20080 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20081 return ret;
20084 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
20085 vld4q_u32 (const uint32_t * __a)
20087 uint32x4x4_t ret;
20088 __builtin_aarch64_simd_xi __o;
20089 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20090 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20091 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20092 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20093 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20094 return ret;
20097 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
20098 vld4q_u64 (const uint64_t * __a)
20100 uint64x2x4_t ret;
20101 __builtin_aarch64_simd_xi __o;
20102 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20103 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20104 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20105 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20106 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20107 return ret;
20110 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
20111 vld4q_f32 (const float32_t * __a)
20113 float32x4x4_t ret;
20114 __builtin_aarch64_simd_xi __o;
20115 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
20116 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
20117 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
20118 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
20119 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
20120 return ret;
20123 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
20124 vld4q_f64 (const float64_t * __a)
20126 float64x2x4_t ret;
20127 __builtin_aarch64_simd_xi __o;
20128 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
20129 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
20130 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
20131 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
20132 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
20133 return ret;
20136 /* vmax */
20138 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20139 vmax_f32 (float32x2_t __a, float32x2_t __b)
20141 return __builtin_aarch64_smax_nanv2sf (__a, __b);
20144 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20145 vmax_s8 (int8x8_t __a, int8x8_t __b)
20147 return __builtin_aarch64_smaxv8qi (__a, __b);
20150 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20151 vmax_s16 (int16x4_t __a, int16x4_t __b)
20153 return __builtin_aarch64_smaxv4hi (__a, __b);
20156 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20157 vmax_s32 (int32x2_t __a, int32x2_t __b)
20159 return __builtin_aarch64_smaxv2si (__a, __b);
20162 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20163 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
20165 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
20166 (int8x8_t) __b);
20169 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20170 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
20172 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
20173 (int16x4_t) __b);
20176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20177 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
20179 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
20180 (int32x2_t) __b);
20183 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20184 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
20186 return __builtin_aarch64_smax_nanv4sf (__a, __b);
20189 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20190 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
20192 return __builtin_aarch64_smax_nanv2df (__a, __b);
20195 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20196 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
20198 return __builtin_aarch64_smaxv16qi (__a, __b);
20201 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20202 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
20204 return __builtin_aarch64_smaxv8hi (__a, __b);
20207 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20208 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
20210 return __builtin_aarch64_smaxv4si (__a, __b);
20213 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20214 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
20216 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
20217 (int8x16_t) __b);
20220 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20221 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
20223 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
20224 (int16x8_t) __b);
20227 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20228 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
20230 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
20231 (int32x4_t) __b);
20234 /* vmaxnm */
20236 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20237 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
20239 return __builtin_aarch64_smaxv2sf (__a, __b);
20242 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20243 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
20245 return __builtin_aarch64_smaxv4sf (__a, __b);
20248 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20249 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
20251 return __builtin_aarch64_smaxv2df (__a, __b);
20254 /* vmaxv */
20256 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20257 vmaxv_f32 (float32x2_t __a)
20259 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
20262 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20263 vmaxv_s8 (int8x8_t __a)
20265 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
20268 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20269 vmaxv_s16 (int16x4_t __a)
20271 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
20274 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20275 vmaxv_s32 (int32x2_t __a)
20277 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
20280 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20281 vmaxv_u8 (uint8x8_t __a)
20283 return vget_lane_u8 ((uint8x8_t)
20284 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
20287 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20288 vmaxv_u16 (uint16x4_t __a)
20290 return vget_lane_u16 ((uint16x4_t)
20291 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
20294 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20295 vmaxv_u32 (uint32x2_t __a)
20297 return vget_lane_u32 ((uint32x2_t)
20298 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
20301 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20302 vmaxvq_f32 (float32x4_t __a)
20304 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
20307 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20308 vmaxvq_f64 (float64x2_t __a)
20310 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
20313 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20314 vmaxvq_s8 (int8x16_t __a)
20316 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
20319 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20320 vmaxvq_s16 (int16x8_t __a)
20322 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
20325 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20326 vmaxvq_s32 (int32x4_t __a)
20328 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
20331 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20332 vmaxvq_u8 (uint8x16_t __a)
20334 return vgetq_lane_u8 ((uint8x16_t)
20335 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
20338 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20339 vmaxvq_u16 (uint16x8_t __a)
20341 return vgetq_lane_u16 ((uint16x8_t)
20342 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
20345 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20346 vmaxvq_u32 (uint32x4_t __a)
20348 return vgetq_lane_u32 ((uint32x4_t)
20349 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
20352 /* vmaxnmv */
20354 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20355 vmaxnmv_f32 (float32x2_t __a)
20357 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
20360 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20361 vmaxnmvq_f32 (float32x4_t __a)
20363 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
20366 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20367 vmaxnmvq_f64 (float64x2_t __a)
20369 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
20372 /* vmin */
20374 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20375 vmin_f32 (float32x2_t __a, float32x2_t __b)
20377 return __builtin_aarch64_smin_nanv2sf (__a, __b);
20380 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20381 vmin_s8 (int8x8_t __a, int8x8_t __b)
20383 return __builtin_aarch64_sminv8qi (__a, __b);
20386 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20387 vmin_s16 (int16x4_t __a, int16x4_t __b)
20389 return __builtin_aarch64_sminv4hi (__a, __b);
20392 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20393 vmin_s32 (int32x2_t __a, int32x2_t __b)
20395 return __builtin_aarch64_sminv2si (__a, __b);
20398 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20399 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
20401 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
20402 (int8x8_t) __b);
20405 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20406 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
20408 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
20409 (int16x4_t) __b);
20412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20413 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
20415 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
20416 (int32x2_t) __b);
20419 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20420 vminq_f32 (float32x4_t __a, float32x4_t __b)
20422 return __builtin_aarch64_smin_nanv4sf (__a, __b);
20425 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20426 vminq_f64 (float64x2_t __a, float64x2_t __b)
20428 return __builtin_aarch64_smin_nanv2df (__a, __b);
20431 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20432 vminq_s8 (int8x16_t __a, int8x16_t __b)
20434 return __builtin_aarch64_sminv16qi (__a, __b);
20437 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20438 vminq_s16 (int16x8_t __a, int16x8_t __b)
20440 return __builtin_aarch64_sminv8hi (__a, __b);
20443 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20444 vminq_s32 (int32x4_t __a, int32x4_t __b)
20446 return __builtin_aarch64_sminv4si (__a, __b);
20449 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20450 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
20452 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
20453 (int8x16_t) __b);
20456 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20457 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
20459 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
20460 (int16x8_t) __b);
20463 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20464 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
20466 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
20467 (int32x4_t) __b);
20470 /* vminnm */
20472 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20473 vminnm_f32 (float32x2_t __a, float32x2_t __b)
20475 return __builtin_aarch64_sminv2sf (__a, __b);
20478 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20479 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
20481 return __builtin_aarch64_sminv4sf (__a, __b);
20484 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20485 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
20487 return __builtin_aarch64_sminv2df (__a, __b);
20490 /* vminv */
20492 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20493 vminv_f32 (float32x2_t __a)
20495 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
20498 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20499 vminv_s8 (int8x8_t __a)
20501 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
20504 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20505 vminv_s16 (int16x4_t __a)
20507 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
20510 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20511 vminv_s32 (int32x2_t __a)
20513 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
20516 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20517 vminv_u8 (uint8x8_t __a)
20519 return vget_lane_u8 ((uint8x8_t)
20520 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
20523 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20524 vminv_u16 (uint16x4_t __a)
20526 return vget_lane_u16 ((uint16x4_t)
20527 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
20530 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20531 vminv_u32 (uint32x2_t __a)
20533 return vget_lane_u32 ((uint32x2_t)
20534 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
20537 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20538 vminvq_f32 (float32x4_t __a)
20540 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
20543 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20544 vminvq_f64 (float64x2_t __a)
20546 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
20549 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20550 vminvq_s8 (int8x16_t __a)
20552 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
20555 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20556 vminvq_s16 (int16x8_t __a)
20558 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
20561 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20562 vminvq_s32 (int32x4_t __a)
20564 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
20567 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20568 vminvq_u8 (uint8x16_t __a)
20570 return vgetq_lane_u8 ((uint8x16_t)
20571 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
20574 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20575 vminvq_u16 (uint16x8_t __a)
20577 return vgetq_lane_u16 ((uint16x8_t)
20578 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
20581 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20582 vminvq_u32 (uint32x4_t __a)
20584 return vgetq_lane_u32 ((uint32x4_t)
20585 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
20588 /* vminnmv */
20590 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20591 vminnmv_f32 (float32x2_t __a)
20593 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
20596 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20597 vminnmvq_f32 (float32x4_t __a)
20599 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
20602 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20603 vminnmvq_f64 (float64x2_t __a)
20605 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
20608 /* vmla */
20610 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20611 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20613 return a + b * c;
20616 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20617 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20619 return a + b * c;
20622 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20623 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20625 return a + b * c;
20628 /* vmla_lane */
20630 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20631 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
20632 float32x2_t __c, const int __lane)
20634 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20637 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20638 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
20639 int16x4_t __c, const int __lane)
20641 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20644 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20645 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
20646 int32x2_t __c, const int __lane)
20648 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20652 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20653 uint16x4_t __c, const int __lane)
20655 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20659 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20660 uint32x2_t __c, const int __lane)
20662 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20665 /* vmla_laneq */
20667 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20668 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
20669 float32x4_t __c, const int __lane)
20671 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20675 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
20676 int16x8_t __c, const int __lane)
20678 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20681 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20682 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
20683 int32x4_t __c, const int __lane)
20685 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20688 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20689 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20690 uint16x8_t __c, const int __lane)
20692 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20695 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20696 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20697 uint32x4_t __c, const int __lane)
20699 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20702 /* vmlaq_lane */
20704 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20705 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
20706 float32x2_t __c, const int __lane)
20708 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20711 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20712 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
20713 int16x4_t __c, const int __lane)
20715 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20718 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20719 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
20720 int32x2_t __c, const int __lane)
20722 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20725 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20726 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20727 uint16x4_t __c, const int __lane)
20729 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20732 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20733 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20734 uint32x2_t __c, const int __lane)
20736 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20739 /* vmlaq_laneq */
20741 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20742 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20743 float32x4_t __c, const int __lane)
20745 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20748 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20749 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20750 int16x8_t __c, const int __lane)
20752 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20755 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20756 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20757 int32x4_t __c, const int __lane)
20759 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20762 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20763 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20764 uint16x8_t __c, const int __lane)
20766 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20769 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20770 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20771 uint32x4_t __c, const int __lane)
20773 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20776 /* vmls */
20778 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20779 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20781 return a - b * c;
20784 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20785 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20787 return a - b * c;
20790 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20791 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20793 return a - b * c;
20796 /* vmls_lane */
20798 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20799 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
20800 float32x2_t __c, const int __lane)
20802 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20805 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20806 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
20807 int16x4_t __c, const int __lane)
20809 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20812 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20813 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
20814 int32x2_t __c, const int __lane)
20816 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20819 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20820 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20821 uint16x4_t __c, const int __lane)
20823 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20826 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20827 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20828 uint32x2_t __c, const int __lane)
20830 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20833 /* vmls_laneq */
20835 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20836 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
20837 float32x4_t __c, const int __lane)
20839 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20842 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20843 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
20844 int16x8_t __c, const int __lane)
20846 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20849 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20850 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
20851 int32x4_t __c, const int __lane)
20853 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20857 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20858 uint16x8_t __c, const int __lane)
20860 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20863 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20864 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20865 uint32x4_t __c, const int __lane)
20867 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20870 /* vmlsq_lane */
20872 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20873 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
20874 float32x2_t __c, const int __lane)
20876 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20880 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
20881 int16x4_t __c, const int __lane)
20883 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20886 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20887 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
20888 int32x2_t __c, const int __lane)
20890 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20894 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20895 uint16x4_t __c, const int __lane)
20897 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20901 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20902 uint32x2_t __c, const int __lane)
20904 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20907 /* vmlsq_laneq */
20909 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20910 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20911 float32x4_t __c, const int __lane)
20913 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20916 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20917 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20918 int16x8_t __c, const int __lane)
20920 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20923 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20924 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20925 int32x4_t __c, const int __lane)
20927 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20930 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20931 uint16x8_t __c, const int __lane)
20933 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20936 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20937 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20938 uint32x4_t __c, const int __lane)
20940 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20943 /* vmul_lane */
20945 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20946 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
20948 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20951 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20952 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
20954 return __a * __b;
20957 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20958 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
20960 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20963 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20964 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
20966 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20969 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20970 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
20972 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20975 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20976 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
20978 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20981 /* vmul_laneq */
20983 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20984 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
20986 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20989 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20990 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
20992 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20995 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20996 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
20998 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
21001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21002 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
21004 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
21007 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21008 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
21010 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
21013 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21014 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
21016 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
21019 /* vmulq_lane */
21021 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21022 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
21024 return __a * __aarch64_vget_lane_f32 (__b, __lane);
21027 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21028 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
21030 return __a * __b;
21033 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21034 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
21036 return __a * __aarch64_vget_lane_s16 (__b, __lane);
21039 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21040 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
21042 return __a * __aarch64_vget_lane_s32 (__b, __lane);
21045 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21046 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
21048 return __a * __aarch64_vget_lane_u16 (__b, __lane);
21051 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21052 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
21054 return __a * __aarch64_vget_lane_u32 (__b, __lane);
21057 /* vmulq_laneq */
21059 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21060 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
21062 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
21065 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21066 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
21068 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
21071 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21072 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
21074 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
21077 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21078 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
21080 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
21083 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21084 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
21086 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
21089 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21090 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
21092 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
21095 /* vneg */
21097 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21098 vneg_f32 (float32x2_t __a)
21100 return -__a;
21103 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21104 vneg_f64 (float64x1_t __a)
21106 return -__a;
21109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21110 vneg_s8 (int8x8_t __a)
21112 return -__a;
21115 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21116 vneg_s16 (int16x4_t __a)
21118 return -__a;
21121 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21122 vneg_s32 (int32x2_t __a)
21124 return -__a;
21127 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21128 vneg_s64 (int64x1_t __a)
21130 return -__a;
21133 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21134 vnegq_f32 (float32x4_t __a)
21136 return -__a;
21139 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21140 vnegq_f64 (float64x2_t __a)
21142 return -__a;
21145 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21146 vnegq_s8 (int8x16_t __a)
21148 return -__a;
21151 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21152 vnegq_s16 (int16x8_t __a)
21154 return -__a;
21157 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21158 vnegq_s32 (int32x4_t __a)
21160 return -__a;
21163 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21164 vnegq_s64 (int64x2_t __a)
21166 return -__a;
21169 /* vqabs */
21171 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21172 vqabsq_s64 (int64x2_t __a)
21174 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21177 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21178 vqabsb_s8 (int8x1_t __a)
21180 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21183 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21184 vqabsh_s16 (int16x1_t __a)
21186 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21189 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21190 vqabss_s32 (int32x1_t __a)
21192 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21195 /* vqadd */
21197 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21198 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21200 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21203 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21204 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21206 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21209 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21210 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21212 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21215 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21216 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21218 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21221 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21222 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21224 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21227 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21228 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21230 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21233 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21234 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21236 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21239 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21240 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21242 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21245 /* vqdmlal */
21247 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21248 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21250 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21253 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21254 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21256 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21260 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21261 int const __d)
21263 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21266 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21267 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21268 int const __d)
21270 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21273 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21274 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21276 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21279 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21280 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21282 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21283 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21287 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21289 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21293 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21295 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21298 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21299 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21301 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21305 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21307 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21310 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21311 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21312 int const __d)
21314 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21317 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21318 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21319 int const __d)
21321 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21324 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21325 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21327 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21330 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21331 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21333 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21334 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21337 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21338 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21340 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21343 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21344 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21346 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21349 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21350 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21352 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21355 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21356 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21358 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21361 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21362 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21364 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21367 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21368 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21370 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21373 /* vqdmlsl */
21375 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21376 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21378 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21381 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21382 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21384 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21387 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21388 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21389 int const __d)
21391 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21394 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21395 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21396 int const __d)
21398 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21401 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21402 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21404 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21407 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21408 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21410 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21411 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21414 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21415 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21417 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21420 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21421 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21423 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21426 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21427 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21429 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21432 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21433 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21435 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21438 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21439 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21440 int const __d)
21442 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21445 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21446 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21447 int const __d)
21449 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21452 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21453 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21455 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21458 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21459 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21461 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21462 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21465 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21466 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21468 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21471 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21472 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21474 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21477 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21478 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21480 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21483 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21484 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21486 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21489 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21490 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21492 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21495 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21496 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21498 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21501 /* vqdmulh */
21503 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21504 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21506 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21509 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21510 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21512 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21515 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21516 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21518 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21521 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21522 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21524 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21527 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21528 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21530 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21533 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21534 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21536 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21539 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21540 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21542 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21545 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21546 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21548 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21551 /* vqdmull */
21553 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21554 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21556 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21559 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21560 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21562 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21566 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21568 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21572 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21574 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21578 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21580 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21584 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21586 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
21587 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21591 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21593 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21596 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21597 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21599 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21602 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21603 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21605 return __builtin_aarch64_sqdmullv2si (__a, __b);
21608 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21609 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21611 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21614 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21615 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21617 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21620 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21621 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21623 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21626 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21627 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21629 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21632 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21633 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21635 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
21636 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21639 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21640 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21642 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21645 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21646 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21648 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21651 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21652 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21654 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21657 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21658 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21660 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21663 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21664 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21666 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21669 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21670 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21672 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21675 /* vqmovn */
21677 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21678 vqmovn_s16 (int16x8_t __a)
21680 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21683 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21684 vqmovn_s32 (int32x4_t __a)
21686 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21689 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21690 vqmovn_s64 (int64x2_t __a)
21692 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21695 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21696 vqmovn_u16 (uint16x8_t __a)
21698 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21701 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21702 vqmovn_u32 (uint32x4_t __a)
21704 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21707 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21708 vqmovn_u64 (uint64x2_t __a)
21710 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21713 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21714 vqmovnh_s16 (int16x1_t __a)
21716 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21719 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21720 vqmovns_s32 (int32x1_t __a)
21722 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21725 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21726 vqmovnd_s64 (int64x1_t __a)
21728 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21731 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21732 vqmovnh_u16 (uint16x1_t __a)
21734 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21737 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21738 vqmovns_u32 (uint32x1_t __a)
21740 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21743 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21744 vqmovnd_u64 (uint64x1_t __a)
21746 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21749 /* vqmovun */
21751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21752 vqmovun_s16 (int16x8_t __a)
21754 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21757 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21758 vqmovun_s32 (int32x4_t __a)
21760 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21764 vqmovun_s64 (int64x2_t __a)
21766 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21769 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21770 vqmovunh_s16 (int16x1_t __a)
21772 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21775 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21776 vqmovuns_s32 (int32x1_t __a)
21778 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21781 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21782 vqmovund_s64 (int64x1_t __a)
21784 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21787 /* vqneg */
21789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21790 vqnegq_s64 (int64x2_t __a)
21792 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21795 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21796 vqnegb_s8 (int8x1_t __a)
21798 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21801 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21802 vqnegh_s16 (int16x1_t __a)
21804 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21807 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21808 vqnegs_s32 (int32x1_t __a)
21810 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21813 /* vqrdmulh */
21815 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21816 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21818 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21821 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21822 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21824 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21827 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21828 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21830 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21833 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21834 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21836 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21839 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21840 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21842 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21845 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21846 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21848 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21851 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21852 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21854 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21857 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21858 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21860 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21863 /* vqrshl */
21865 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21866 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
21868 return __builtin_aarch64_sqrshlv8qi (__a, __b);
21871 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21872 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
21874 return __builtin_aarch64_sqrshlv4hi (__a, __b);
21877 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21878 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
21880 return __builtin_aarch64_sqrshlv2si (__a, __b);
21883 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21884 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
21886 return __builtin_aarch64_sqrshldi (__a, __b);
21889 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21890 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
21892 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
21895 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21896 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
21898 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
21901 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21902 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
21904 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
21907 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21908 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
21910 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
21913 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21914 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
21916 return __builtin_aarch64_sqrshlv16qi (__a, __b);
21919 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21920 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
21922 return __builtin_aarch64_sqrshlv8hi (__a, __b);
21925 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21926 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
21928 return __builtin_aarch64_sqrshlv4si (__a, __b);
21931 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21932 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
21934 return __builtin_aarch64_sqrshlv2di (__a, __b);
21937 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21938 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21940 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
21943 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21944 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21946 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
21949 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21950 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21952 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
21955 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21956 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21958 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
21961 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21962 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
21964 return __builtin_aarch64_sqrshlqi (__a, __b);
21967 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21968 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
21970 return __builtin_aarch64_sqrshlhi (__a, __b);
21973 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21974 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
21976 return __builtin_aarch64_sqrshlsi (__a, __b);
21979 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21980 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
21982 return __builtin_aarch64_sqrshldi (__a, __b);
21985 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21986 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21988 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
21991 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21992 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21994 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
21997 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21998 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22000 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22004 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22006 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22009 /* vqrshrn */
22011 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22012 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22014 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22017 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22018 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22020 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22023 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22024 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22026 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22029 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22030 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22032 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22036 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22038 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22041 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22042 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22044 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22047 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22048 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22050 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22053 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22054 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22056 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22059 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22060 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22062 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22065 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22066 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22068 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22071 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22072 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22074 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22077 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22078 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22080 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22083 /* vqrshrun */
22085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22086 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22088 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22091 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22092 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22094 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22097 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22098 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22100 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22103 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22104 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22106 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22109 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22110 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22112 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22115 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22116 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22118 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22121 /* vqshl */
22123 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22124 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22126 return __builtin_aarch64_sqshlv8qi (__a, __b);
22129 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22130 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22132 return __builtin_aarch64_sqshlv4hi (__a, __b);
22135 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22136 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22138 return __builtin_aarch64_sqshlv2si (__a, __b);
22141 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22142 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22144 return __builtin_aarch64_sqshldi (__a, __b);
22147 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22148 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22150 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22153 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22154 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22156 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22159 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22160 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22162 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22166 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22168 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22171 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22172 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22174 return __builtin_aarch64_sqshlv16qi (__a, __b);
22177 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22178 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22180 return __builtin_aarch64_sqshlv8hi (__a, __b);
22183 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22184 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22186 return __builtin_aarch64_sqshlv4si (__a, __b);
22189 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22190 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22192 return __builtin_aarch64_sqshlv2di (__a, __b);
22195 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22196 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22198 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22201 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22202 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22204 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22207 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22208 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22210 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22214 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22216 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22219 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22220 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22222 return __builtin_aarch64_sqshlqi (__a, __b);
22225 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22226 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22228 return __builtin_aarch64_sqshlhi (__a, __b);
22231 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22232 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22234 return __builtin_aarch64_sqshlsi (__a, __b);
22237 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22238 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22240 return __builtin_aarch64_sqshldi (__a, __b);
22243 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22244 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22246 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22249 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22250 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22252 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22255 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22256 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22258 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22261 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22262 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22264 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22267 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22268 vqshl_n_s8 (int8x8_t __a, const int __b)
22270 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22273 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22274 vqshl_n_s16 (int16x4_t __a, const int __b)
22276 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22279 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22280 vqshl_n_s32 (int32x2_t __a, const int __b)
22282 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22285 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22286 vqshl_n_s64 (int64x1_t __a, const int __b)
22288 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22291 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22292 vqshl_n_u8 (uint8x8_t __a, const int __b)
22294 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22297 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22298 vqshl_n_u16 (uint16x4_t __a, const int __b)
22300 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22303 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22304 vqshl_n_u32 (uint32x2_t __a, const int __b)
22306 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22309 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22310 vqshl_n_u64 (uint64x1_t __a, const int __b)
22312 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22315 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22316 vqshlq_n_s8 (int8x16_t __a, const int __b)
22318 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22321 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22322 vqshlq_n_s16 (int16x8_t __a, const int __b)
22324 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22327 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22328 vqshlq_n_s32 (int32x4_t __a, const int __b)
22330 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22333 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22334 vqshlq_n_s64 (int64x2_t __a, const int __b)
22336 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22339 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22340 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22342 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22345 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22346 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22348 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22352 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22354 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22357 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22358 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22360 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22363 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22364 vqshlb_n_s8 (int8x1_t __a, const int __b)
22366 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22369 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22370 vqshlh_n_s16 (int16x1_t __a, const int __b)
22372 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22375 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22376 vqshls_n_s32 (int32x1_t __a, const int __b)
22378 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22381 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22382 vqshld_n_s64 (int64x1_t __a, const int __b)
22384 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22387 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22388 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22390 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22393 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22394 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22396 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22399 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22400 vqshls_n_u32 (uint32x1_t __a, const int __b)
22402 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22405 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22406 vqshld_n_u64 (uint64x1_t __a, const int __b)
22408 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22411 /* vqshlu */
22413 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22414 vqshlu_n_s8 (int8x8_t __a, const int __b)
22416 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22419 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22420 vqshlu_n_s16 (int16x4_t __a, const int __b)
22422 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22426 vqshlu_n_s32 (int32x2_t __a, const int __b)
22428 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22431 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22432 vqshlu_n_s64 (int64x1_t __a, const int __b)
22434 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22437 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22438 vqshluq_n_s8 (int8x16_t __a, const int __b)
22440 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22443 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22444 vqshluq_n_s16 (int16x8_t __a, const int __b)
22446 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22450 vqshluq_n_s32 (int32x4_t __a, const int __b)
22452 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22455 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22456 vqshluq_n_s64 (int64x2_t __a, const int __b)
22458 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22461 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22462 vqshlub_n_s8 (int8x1_t __a, const int __b)
22464 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22467 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22468 vqshluh_n_s16 (int16x1_t __a, const int __b)
22470 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22473 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22474 vqshlus_n_s32 (int32x1_t __a, const int __b)
22476 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22479 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22480 vqshlud_n_s64 (int64x1_t __a, const int __b)
22482 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22485 /* vqshrn */
22487 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22488 vqshrn_n_s16 (int16x8_t __a, const int __b)
22490 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22493 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22494 vqshrn_n_s32 (int32x4_t __a, const int __b)
22496 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22500 vqshrn_n_s64 (int64x2_t __a, const int __b)
22502 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22505 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22506 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22508 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22511 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22512 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22514 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22517 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22518 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22520 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22523 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22524 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22526 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22529 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22530 vqshrns_n_s32 (int32x1_t __a, const int __b)
22532 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22535 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22536 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22538 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22541 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22542 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22544 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22547 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22548 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22550 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22553 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22554 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22556 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22559 /* vqshrun */
22561 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22562 vqshrun_n_s16 (int16x8_t __a, const int __b)
22564 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22567 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22568 vqshrun_n_s32 (int32x4_t __a, const int __b)
22570 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22573 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22574 vqshrun_n_s64 (int64x2_t __a, const int __b)
22576 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22579 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22580 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22582 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22585 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22586 vqshruns_n_s32 (int32x1_t __a, const int __b)
22588 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22591 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22592 vqshrund_n_s64 (int64x1_t __a, const int __b)
22594 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22597 /* vqsub */
22599 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22600 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22602 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22605 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22606 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22608 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22611 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22612 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22614 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22617 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22618 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22620 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22623 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22624 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22626 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22629 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22630 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22632 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22635 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22636 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22638 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22641 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22642 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22644 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22647 /* vrecpe */
22649 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22650 vrecpes_f32 (float32_t __a)
22652 return __builtin_aarch64_frecpesf (__a);
22655 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22656 vrecped_f64 (float64_t __a)
22658 return __builtin_aarch64_frecpedf (__a);
22661 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22662 vrecpe_f32 (float32x2_t __a)
22664 return __builtin_aarch64_frecpev2sf (__a);
22667 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22668 vrecpeq_f32 (float32x4_t __a)
22670 return __builtin_aarch64_frecpev4sf (__a);
22673 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22674 vrecpeq_f64 (float64x2_t __a)
22676 return __builtin_aarch64_frecpev2df (__a);
22679 /* vrecps */
22681 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22682 vrecpss_f32 (float32_t __a, float32_t __b)
22684 return __builtin_aarch64_frecpssf (__a, __b);
22687 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22688 vrecpsd_f64 (float64_t __a, float64_t __b)
22690 return __builtin_aarch64_frecpsdf (__a, __b);
22693 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22694 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22696 return __builtin_aarch64_frecpsv2sf (__a, __b);
22699 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22700 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22702 return __builtin_aarch64_frecpsv4sf (__a, __b);
22705 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22706 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22708 return __builtin_aarch64_frecpsv2df (__a, __b);
22711 /* vrecpx */
22713 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22714 vrecpxs_f32 (float32_t __a)
22716 return __builtin_aarch64_frecpxsf (__a);
22719 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22720 vrecpxd_f64 (float64_t __a)
22722 return __builtin_aarch64_frecpxdf (__a);
22725 /* vrnd */
22727 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22728 vrnd_f32 (float32x2_t __a)
22730 return __builtin_aarch64_btruncv2sf (__a);
22733 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22734 vrndq_f32 (float32x4_t __a)
22736 return __builtin_aarch64_btruncv4sf (__a);
22739 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22740 vrndq_f64 (float64x2_t __a)
22742 return __builtin_aarch64_btruncv2df (__a);
22745 /* vrnda */
22747 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22748 vrnda_f32 (float32x2_t __a)
22750 return __builtin_aarch64_roundv2sf (__a);
22753 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22754 vrndaq_f32 (float32x4_t __a)
22756 return __builtin_aarch64_roundv4sf (__a);
22759 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22760 vrndaq_f64 (float64x2_t __a)
22762 return __builtin_aarch64_roundv2df (__a);
22765 /* vrndi */
22767 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22768 vrndi_f32 (float32x2_t __a)
22770 return __builtin_aarch64_nearbyintv2sf (__a);
22773 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22774 vrndiq_f32 (float32x4_t __a)
22776 return __builtin_aarch64_nearbyintv4sf (__a);
22779 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22780 vrndiq_f64 (float64x2_t __a)
22782 return __builtin_aarch64_nearbyintv2df (__a);
22785 /* vrndm */
22787 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22788 vrndm_f32 (float32x2_t __a)
22790 return __builtin_aarch64_floorv2sf (__a);
22793 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22794 vrndmq_f32 (float32x4_t __a)
22796 return __builtin_aarch64_floorv4sf (__a);
22799 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22800 vrndmq_f64 (float64x2_t __a)
22802 return __builtin_aarch64_floorv2df (__a);
22805 /* vrndn */
22807 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22808 vrndn_f32 (float32x2_t __a)
22810 return __builtin_aarch64_frintnv2sf (__a);
22812 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22813 vrndnq_f32 (float32x4_t __a)
22815 return __builtin_aarch64_frintnv4sf (__a);
22818 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22819 vrndnq_f64 (float64x2_t __a)
22821 return __builtin_aarch64_frintnv2df (__a);
22824 /* vrndp */
22826 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22827 vrndp_f32 (float32x2_t __a)
22829 return __builtin_aarch64_ceilv2sf (__a);
22832 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22833 vrndpq_f32 (float32x4_t __a)
22835 return __builtin_aarch64_ceilv4sf (__a);
22838 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22839 vrndpq_f64 (float64x2_t __a)
22841 return __builtin_aarch64_ceilv2df (__a);
22844 /* vrndx */
22846 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22847 vrndx_f32 (float32x2_t __a)
22849 return __builtin_aarch64_rintv2sf (__a);
22852 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22853 vrndxq_f32 (float32x4_t __a)
22855 return __builtin_aarch64_rintv4sf (__a);
22858 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22859 vrndxq_f64 (float64x2_t __a)
22861 return __builtin_aarch64_rintv2df (__a);
22864 /* vrshl */
22866 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22867 vrshl_s8 (int8x8_t __a, int8x8_t __b)
22869 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
22872 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22873 vrshl_s16 (int16x4_t __a, int16x4_t __b)
22875 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
22878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22879 vrshl_s32 (int32x2_t __a, int32x2_t __b)
22881 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
22884 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22885 vrshl_s64 (int64x1_t __a, int64x1_t __b)
22887 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22890 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22891 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
22893 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
22896 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22897 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
22899 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
22902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22903 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
22905 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
22908 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22909 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
22911 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
22914 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22915 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
22917 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
22920 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22921 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
22923 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
22926 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22927 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
22929 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
22932 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22933 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
22935 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
22938 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22939 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22941 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
22944 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22945 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22947 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
22950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22951 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22953 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
22956 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22957 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22959 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
22962 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22963 vrshld_s64 (int64x1_t __a, int64x1_t __b)
22965 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22968 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22969 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22971 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
22974 /* vrshr */
22976 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22977 vrshr_n_s8 (int8x8_t __a, const int __b)
22979 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
22982 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22983 vrshr_n_s16 (int16x4_t __a, const int __b)
22985 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
22988 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22989 vrshr_n_s32 (int32x2_t __a, const int __b)
22991 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
22994 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22995 vrshr_n_s64 (int64x1_t __a, const int __b)
22997 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23000 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23001 vrshr_n_u8 (uint8x8_t __a, const int __b)
23003 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23007 vrshr_n_u16 (uint16x4_t __a, const int __b)
23009 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23012 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23013 vrshr_n_u32 (uint32x2_t __a, const int __b)
23015 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23018 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23019 vrshr_n_u64 (uint64x1_t __a, const int __b)
23021 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23024 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23025 vrshrq_n_s8 (int8x16_t __a, const int __b)
23027 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23030 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23031 vrshrq_n_s16 (int16x8_t __a, const int __b)
23033 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23037 vrshrq_n_s32 (int32x4_t __a, const int __b)
23039 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23043 vrshrq_n_s64 (int64x2_t __a, const int __b)
23045 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23048 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23049 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23051 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23054 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23055 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23057 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23060 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23061 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23063 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23066 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23067 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23069 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23072 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23073 vrshrd_n_s64 (int64x1_t __a, const int __b)
23075 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23078 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23079 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23081 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23084 /* vrsra */
23086 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23087 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23089 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23093 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23095 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23098 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23099 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23101 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23104 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23105 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23107 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23110 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23111 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23113 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23114 (int8x8_t) __b, __c);
23117 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23118 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23120 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23121 (int16x4_t) __b, __c);
23124 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23125 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23127 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23128 (int32x2_t) __b, __c);
23131 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23132 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23134 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23135 (int64x1_t) __b, __c);
23138 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23139 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23141 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23144 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23145 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23147 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23151 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23153 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23156 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23157 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23159 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23162 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23163 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23165 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23166 (int8x16_t) __b, __c);
23169 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23170 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23172 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23173 (int16x8_t) __b, __c);
23176 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23177 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23179 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23180 (int32x4_t) __b, __c);
23183 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23184 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23186 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23187 (int64x2_t) __b, __c);
23190 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23191 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23193 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23196 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23197 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23199 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23202 /* vshl */
23204 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23205 vshl_n_s8 (int8x8_t __a, const int __b)
23207 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23210 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23211 vshl_n_s16 (int16x4_t __a, const int __b)
23213 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23217 vshl_n_s32 (int32x2_t __a, const int __b)
23219 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23222 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23223 vshl_n_s64 (int64x1_t __a, const int __b)
23225 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23228 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23229 vshl_n_u8 (uint8x8_t __a, const int __b)
23231 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23234 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23235 vshl_n_u16 (uint16x4_t __a, const int __b)
23237 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23240 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23241 vshl_n_u32 (uint32x2_t __a, const int __b)
23243 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23246 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23247 vshl_n_u64 (uint64x1_t __a, const int __b)
23249 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23252 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23253 vshlq_n_s8 (int8x16_t __a, const int __b)
23255 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23258 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23259 vshlq_n_s16 (int16x8_t __a, const int __b)
23261 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23264 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23265 vshlq_n_s32 (int32x4_t __a, const int __b)
23267 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23270 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23271 vshlq_n_s64 (int64x2_t __a, const int __b)
23273 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23276 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23277 vshlq_n_u8 (uint8x16_t __a, const int __b)
23279 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23283 vshlq_n_u16 (uint16x8_t __a, const int __b)
23285 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23289 vshlq_n_u32 (uint32x4_t __a, const int __b)
23291 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23294 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23295 vshlq_n_u64 (uint64x2_t __a, const int __b)
23297 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23300 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23301 vshld_n_s64 (int64x1_t __a, const int __b)
23303 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23306 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23307 vshld_n_u64 (uint64x1_t __a, const int __b)
23309 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23312 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23313 vshl_s8 (int8x8_t __a, int8x8_t __b)
23315 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23318 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23319 vshl_s16 (int16x4_t __a, int16x4_t __b)
23321 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23325 vshl_s32 (int32x2_t __a, int32x2_t __b)
23327 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23330 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23331 vshl_s64 (int64x1_t __a, int64x1_t __b)
23333 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23336 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23337 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23339 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23342 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23343 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23345 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23348 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23349 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23351 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23354 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23355 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23357 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23360 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23361 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23363 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23366 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23367 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23369 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23372 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23373 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23375 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23378 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23379 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23381 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23384 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23385 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23387 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23390 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23391 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23393 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23396 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23397 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23399 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23402 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23403 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23405 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23408 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23409 vshld_s64 (int64x1_t __a, int64x1_t __b)
23411 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23414 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23415 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23417 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23420 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23421 vshll_high_n_s8 (int8x16_t __a, const int __b)
23423 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23426 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23427 vshll_high_n_s16 (int16x8_t __a, const int __b)
23429 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23432 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23433 vshll_high_n_s32 (int32x4_t __a, const int __b)
23435 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23439 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23441 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23444 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23445 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23447 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23450 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23451 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23453 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23456 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23457 vshll_n_s8 (int8x8_t __a, const int __b)
23459 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23462 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23463 vshll_n_s16 (int16x4_t __a, const int __b)
23465 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23468 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23469 vshll_n_s32 (int32x2_t __a, const int __b)
23471 return __builtin_aarch64_sshll_nv2si (__a, __b);
23474 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23475 vshll_n_u8 (uint8x8_t __a, const int __b)
23477 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23480 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23481 vshll_n_u16 (uint16x4_t __a, const int __b)
23483 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23486 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23487 vshll_n_u32 (uint32x2_t __a, const int __b)
23489 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23492 /* vshr */
23494 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23495 vshr_n_s8 (int8x8_t __a, const int __b)
23497 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23500 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23501 vshr_n_s16 (int16x4_t __a, const int __b)
23503 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23506 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23507 vshr_n_s32 (int32x2_t __a, const int __b)
23509 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23512 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23513 vshr_n_s64 (int64x1_t __a, const int __b)
23515 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23518 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23519 vshr_n_u8 (uint8x8_t __a, const int __b)
23521 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23525 vshr_n_u16 (uint16x4_t __a, const int __b)
23527 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23530 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23531 vshr_n_u32 (uint32x2_t __a, const int __b)
23533 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23536 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23537 vshr_n_u64 (uint64x1_t __a, const int __b)
23539 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23542 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23543 vshrq_n_s8 (int8x16_t __a, const int __b)
23545 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23548 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23549 vshrq_n_s16 (int16x8_t __a, const int __b)
23551 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23554 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23555 vshrq_n_s32 (int32x4_t __a, const int __b)
23557 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23560 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23561 vshrq_n_s64 (int64x2_t __a, const int __b)
23563 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23566 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23567 vshrq_n_u8 (uint8x16_t __a, const int __b)
23569 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23573 vshrq_n_u16 (uint16x8_t __a, const int __b)
23575 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23578 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23579 vshrq_n_u32 (uint32x4_t __a, const int __b)
23581 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23584 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23585 vshrq_n_u64 (uint64x2_t __a, const int __b)
23587 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23590 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23591 vshrd_n_s64 (int64x1_t __a, const int __b)
23593 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23596 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23597 vshrd_n_u64 (uint64x1_t __a, const int __b)
23599 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23602 /* vsli */
23604 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23605 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23607 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23611 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23613 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23617 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23619 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23622 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23623 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23625 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23629 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23631 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23632 (int8x8_t) __b, __c);
23635 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23636 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23638 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23639 (int16x4_t) __b, __c);
23642 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23643 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23645 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23646 (int32x2_t) __b, __c);
23649 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23650 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23652 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23653 (int64x1_t) __b, __c);
23656 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23657 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23659 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23662 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23663 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23665 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23669 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23671 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23674 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23675 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23677 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23681 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23683 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23684 (int8x16_t) __b, __c);
23687 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23688 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23690 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23691 (int16x8_t) __b, __c);
23694 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23695 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23697 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23698 (int32x4_t) __b, __c);
23701 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23702 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23704 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23705 (int64x2_t) __b, __c);
23708 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23709 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23711 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23714 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23715 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23717 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23720 /* vsqadd */
23722 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23723 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23725 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23726 (int8x8_t) __b);
23729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23730 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23732 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23733 (int16x4_t) __b);
23736 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23737 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23739 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23740 (int32x2_t) __b);
23743 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23744 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23746 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23749 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23750 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23752 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23753 (int8x16_t) __b);
23756 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23757 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23759 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23760 (int16x8_t) __b);
23763 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23764 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23766 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23767 (int32x4_t) __b);
23770 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23771 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23773 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23774 (int64x2_t) __b);
23777 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23778 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23780 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23783 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23784 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23786 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23789 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23790 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23792 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23795 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23796 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23798 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23801 /* vsqrt */
23802 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23803 vsqrt_f32 (float32x2_t a)
23805 return __builtin_aarch64_sqrtv2sf (a);
23808 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23809 vsqrtq_f32 (float32x4_t a)
23811 return __builtin_aarch64_sqrtv4sf (a);
23814 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23815 vsqrtq_f64 (float64x2_t a)
23817 return __builtin_aarch64_sqrtv2df (a);
23820 /* vsra */
23822 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23823 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23825 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23828 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23829 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23831 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23834 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23835 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23837 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23840 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23841 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23843 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23846 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23847 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23849 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23850 (int8x8_t) __b, __c);
23853 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23854 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23856 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23857 (int16x4_t) __b, __c);
23860 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23861 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23863 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23864 (int32x2_t) __b, __c);
23867 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23868 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23870 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
23871 (int64x1_t) __b, __c);
23874 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23875 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23877 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
23880 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23881 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23883 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
23886 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23887 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23889 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
23892 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23893 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23895 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
23898 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23899 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23901 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
23902 (int8x16_t) __b, __c);
23905 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23906 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23908 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
23909 (int16x8_t) __b, __c);
23912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23913 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23915 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
23916 (int32x4_t) __b, __c);
23919 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23920 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23922 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
23923 (int64x2_t) __b, __c);
23926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23927 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23929 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23932 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23933 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23935 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
23938 /* vsri */
23940 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23941 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23943 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
23946 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23947 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23949 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
23952 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23953 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23955 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23958 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23959 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23961 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23964 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23965 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23967 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
23968 (int8x8_t) __b, __c);
23971 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23972 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23974 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
23975 (int16x4_t) __b, __c);
23978 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23979 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23981 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
23982 (int32x2_t) __b, __c);
23985 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23986 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23988 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
23989 (int64x1_t) __b, __c);
23992 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23993 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23995 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23998 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23999 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24001 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24005 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24007 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24010 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24011 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24013 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24016 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24017 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24019 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24020 (int8x16_t) __b, __c);
24023 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24024 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24026 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24027 (int16x8_t) __b, __c);
24030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24031 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24033 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24034 (int32x4_t) __b, __c);
24037 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24038 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24040 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24041 (int64x2_t) __b, __c);
24044 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24045 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24047 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24050 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24051 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24053 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24056 /* vst1 */
24058 __extension__ static __inline void __attribute__ ((__always_inline__))
24059 vst1_f32 (float32_t *a, float32x2_t b)
24061 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
24064 __extension__ static __inline void __attribute__ ((__always_inline__))
24065 vst1_f64 (float64_t *a, float64x1_t b)
24067 *a = b;
24070 __extension__ static __inline void __attribute__ ((__always_inline__))
24071 vst1_p8 (poly8_t *a, poly8x8_t b)
24073 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24074 (int8x8_t) b);
24077 __extension__ static __inline void __attribute__ ((__always_inline__))
24078 vst1_p16 (poly16_t *a, poly16x4_t b)
24080 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24081 (int16x4_t) b);
24084 __extension__ static __inline void __attribute__ ((__always_inline__))
24085 vst1_s8 (int8_t *a, int8x8_t b)
24087 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
24090 __extension__ static __inline void __attribute__ ((__always_inline__))
24091 vst1_s16 (int16_t *a, int16x4_t b)
24093 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
24096 __extension__ static __inline void __attribute__ ((__always_inline__))
24097 vst1_s32 (int32_t *a, int32x2_t b)
24099 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
24102 __extension__ static __inline void __attribute__ ((__always_inline__))
24103 vst1_s64 (int64_t *a, int64x1_t b)
24105 *a = b;
24108 __extension__ static __inline void __attribute__ ((__always_inline__))
24109 vst1_u8 (uint8_t *a, uint8x8_t b)
24111 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24112 (int8x8_t) b);
24115 __extension__ static __inline void __attribute__ ((__always_inline__))
24116 vst1_u16 (uint16_t *a, uint16x4_t b)
24118 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24119 (int16x4_t) b);
24122 __extension__ static __inline void __attribute__ ((__always_inline__))
24123 vst1_u32 (uint32_t *a, uint32x2_t b)
24125 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
24126 (int32x2_t) b);
24129 __extension__ static __inline void __attribute__ ((__always_inline__))
24130 vst1_u64 (uint64_t *a, uint64x1_t b)
24132 *a = b;
24135 __extension__ static __inline void __attribute__ ((__always_inline__))
24136 vst1q_f32 (float32_t *a, float32x4_t b)
24138 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
24141 __extension__ static __inline void __attribute__ ((__always_inline__))
24142 vst1q_f64 (float64_t *a, float64x2_t b)
24144 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
24147 /* vst1q */
24149 __extension__ static __inline void __attribute__ ((__always_inline__))
24150 vst1q_p8 (poly8_t *a, poly8x16_t b)
24152 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24153 (int8x16_t) b);
24156 __extension__ static __inline void __attribute__ ((__always_inline__))
24157 vst1q_p16 (poly16_t *a, poly16x8_t b)
24159 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24160 (int16x8_t) b);
24163 __extension__ static __inline void __attribute__ ((__always_inline__))
24164 vst1q_s8 (int8_t *a, int8x16_t b)
24166 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
24169 __extension__ static __inline void __attribute__ ((__always_inline__))
24170 vst1q_s16 (int16_t *a, int16x8_t b)
24172 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24175 __extension__ static __inline void __attribute__ ((__always_inline__))
24176 vst1q_s32 (int32_t *a, int32x4_t b)
24178 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24181 __extension__ static __inline void __attribute__ ((__always_inline__))
24182 vst1q_s64 (int64_t *a, int64x2_t b)
24184 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24187 __extension__ static __inline void __attribute__ ((__always_inline__))
24188 vst1q_u8 (uint8_t *a, uint8x16_t b)
24190 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24191 (int8x16_t) b);
24194 __extension__ static __inline void __attribute__ ((__always_inline__))
24195 vst1q_u16 (uint16_t *a, uint16x8_t b)
24197 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24198 (int16x8_t) b);
24201 __extension__ static __inline void __attribute__ ((__always_inline__))
24202 vst1q_u32 (uint32_t *a, uint32x4_t b)
24204 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24205 (int32x4_t) b);
24208 __extension__ static __inline void __attribute__ ((__always_inline__))
24209 vst1q_u64 (uint64_t *a, uint64x2_t b)
24211 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24212 (int64x2_t) b);
24215 /* vstn */
24217 __extension__ static __inline void
24218 vst2_s64 (int64_t * __a, int64x1x2_t val)
24220 __builtin_aarch64_simd_oi __o;
24221 int64x2x2_t temp;
24222 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24223 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24224 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24225 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24226 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24229 __extension__ static __inline void
24230 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24232 __builtin_aarch64_simd_oi __o;
24233 uint64x2x2_t temp;
24234 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24235 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24236 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24237 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24238 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24241 __extension__ static __inline void
24242 vst2_f64 (float64_t * __a, float64x1x2_t val)
24244 __builtin_aarch64_simd_oi __o;
24245 float64x2x2_t temp;
24246 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24247 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24248 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24249 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24250 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24253 __extension__ static __inline void
24254 vst2_s8 (int8_t * __a, int8x8x2_t val)
24256 __builtin_aarch64_simd_oi __o;
24257 int8x16x2_t temp;
24258 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24259 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24260 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24261 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24262 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24265 __extension__ static __inline void __attribute__ ((__always_inline__))
24266 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24268 __builtin_aarch64_simd_oi __o;
24269 poly8x16x2_t temp;
24270 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24271 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24272 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24273 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24274 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24277 __extension__ static __inline void __attribute__ ((__always_inline__))
24278 vst2_s16 (int16_t * __a, int16x4x2_t val)
24280 __builtin_aarch64_simd_oi __o;
24281 int16x8x2_t temp;
24282 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24283 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24284 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24285 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24286 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24289 __extension__ static __inline void __attribute__ ((__always_inline__))
24290 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24292 __builtin_aarch64_simd_oi __o;
24293 poly16x8x2_t temp;
24294 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24295 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24296 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24297 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24298 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24301 __extension__ static __inline void __attribute__ ((__always_inline__))
24302 vst2_s32 (int32_t * __a, int32x2x2_t val)
24304 __builtin_aarch64_simd_oi __o;
24305 int32x4x2_t temp;
24306 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24307 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24308 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24309 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24310 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24313 __extension__ static __inline void __attribute__ ((__always_inline__))
24314 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24316 __builtin_aarch64_simd_oi __o;
24317 uint8x16x2_t temp;
24318 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24319 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24320 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24321 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24322 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24325 __extension__ static __inline void __attribute__ ((__always_inline__))
24326 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24328 __builtin_aarch64_simd_oi __o;
24329 uint16x8x2_t temp;
24330 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24331 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24332 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24333 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24334 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24337 __extension__ static __inline void __attribute__ ((__always_inline__))
24338 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24340 __builtin_aarch64_simd_oi __o;
24341 uint32x4x2_t temp;
24342 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24343 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24344 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24345 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24346 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24349 __extension__ static __inline void __attribute__ ((__always_inline__))
24350 vst2_f32 (float32_t * __a, float32x2x2_t val)
24352 __builtin_aarch64_simd_oi __o;
24353 float32x4x2_t temp;
24354 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24355 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24356 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24357 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24358 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24361 __extension__ static __inline void __attribute__ ((__always_inline__))
24362 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24364 __builtin_aarch64_simd_oi __o;
24365 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24366 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24367 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24370 __extension__ static __inline void __attribute__ ((__always_inline__))
24371 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24373 __builtin_aarch64_simd_oi __o;
24374 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24375 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24376 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24379 __extension__ static __inline void __attribute__ ((__always_inline__))
24380 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24382 __builtin_aarch64_simd_oi __o;
24383 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24384 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24385 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24388 __extension__ static __inline void __attribute__ ((__always_inline__))
24389 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24391 __builtin_aarch64_simd_oi __o;
24392 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24393 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24394 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24397 __extension__ static __inline void __attribute__ ((__always_inline__))
24398 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24400 __builtin_aarch64_simd_oi __o;
24401 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24402 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24403 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24406 __extension__ static __inline void __attribute__ ((__always_inline__))
24407 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24409 __builtin_aarch64_simd_oi __o;
24410 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24411 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24412 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24415 __extension__ static __inline void __attribute__ ((__always_inline__))
24416 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24418 __builtin_aarch64_simd_oi __o;
24419 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24420 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24421 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24424 __extension__ static __inline void __attribute__ ((__always_inline__))
24425 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24427 __builtin_aarch64_simd_oi __o;
24428 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24429 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24430 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24433 __extension__ static __inline void __attribute__ ((__always_inline__))
24434 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24436 __builtin_aarch64_simd_oi __o;
24437 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24438 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24439 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24442 __extension__ static __inline void __attribute__ ((__always_inline__))
24443 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24445 __builtin_aarch64_simd_oi __o;
24446 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24447 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24448 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24451 __extension__ static __inline void __attribute__ ((__always_inline__))
24452 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24454 __builtin_aarch64_simd_oi __o;
24455 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24456 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24457 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24460 __extension__ static __inline void __attribute__ ((__always_inline__))
24461 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24463 __builtin_aarch64_simd_oi __o;
24464 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24465 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24466 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24469 __extension__ static __inline void
24470 vst3_s64 (int64_t * __a, int64x1x3_t val)
24472 __builtin_aarch64_simd_ci __o;
24473 int64x2x3_t temp;
24474 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24475 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24476 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24477 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24478 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24479 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24480 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24483 __extension__ static __inline void
24484 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24486 __builtin_aarch64_simd_ci __o;
24487 uint64x2x3_t temp;
24488 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24489 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24490 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24491 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24492 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24493 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24494 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24497 __extension__ static __inline void
24498 vst3_f64 (float64_t * __a, float64x1x3_t val)
24500 __builtin_aarch64_simd_ci __o;
24501 float64x2x3_t temp;
24502 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24503 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24504 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24505 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24506 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24507 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24508 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24511 __extension__ static __inline void
24512 vst3_s8 (int8_t * __a, int8x8x3_t val)
24514 __builtin_aarch64_simd_ci __o;
24515 int8x16x3_t temp;
24516 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24517 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24518 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24519 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24520 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24521 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24522 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24525 __extension__ static __inline void __attribute__ ((__always_inline__))
24526 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24528 __builtin_aarch64_simd_ci __o;
24529 poly8x16x3_t temp;
24530 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24531 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24532 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24533 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24534 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24535 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24536 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24539 __extension__ static __inline void __attribute__ ((__always_inline__))
24540 vst3_s16 (int16_t * __a, int16x4x3_t val)
24542 __builtin_aarch64_simd_ci __o;
24543 int16x8x3_t temp;
24544 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24545 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24546 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24547 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24548 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24549 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24550 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24553 __extension__ static __inline void __attribute__ ((__always_inline__))
24554 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24556 __builtin_aarch64_simd_ci __o;
24557 poly16x8x3_t temp;
24558 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24559 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24560 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24561 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24562 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24563 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24564 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24567 __extension__ static __inline void __attribute__ ((__always_inline__))
24568 vst3_s32 (int32_t * __a, int32x2x3_t val)
24570 __builtin_aarch64_simd_ci __o;
24571 int32x4x3_t temp;
24572 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24573 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24574 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24575 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24576 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24577 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24578 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24581 __extension__ static __inline void __attribute__ ((__always_inline__))
24582 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24584 __builtin_aarch64_simd_ci __o;
24585 uint8x16x3_t temp;
24586 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24587 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24588 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24589 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24590 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24591 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24592 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24595 __extension__ static __inline void __attribute__ ((__always_inline__))
24596 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24598 __builtin_aarch64_simd_ci __o;
24599 uint16x8x3_t temp;
24600 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24601 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24602 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24603 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24604 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24605 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24606 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24609 __extension__ static __inline void __attribute__ ((__always_inline__))
24610 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24612 __builtin_aarch64_simd_ci __o;
24613 uint32x4x3_t temp;
24614 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24615 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24616 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24617 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24618 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24619 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24620 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24623 __extension__ static __inline void __attribute__ ((__always_inline__))
24624 vst3_f32 (float32_t * __a, float32x2x3_t val)
24626 __builtin_aarch64_simd_ci __o;
24627 float32x4x3_t temp;
24628 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24629 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24630 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24631 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24632 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24633 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24634 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24637 __extension__ static __inline void __attribute__ ((__always_inline__))
24638 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24640 __builtin_aarch64_simd_ci __o;
24641 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24642 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24643 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24644 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24647 __extension__ static __inline void __attribute__ ((__always_inline__))
24648 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24650 __builtin_aarch64_simd_ci __o;
24651 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24652 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24653 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24654 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24657 __extension__ static __inline void __attribute__ ((__always_inline__))
24658 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24660 __builtin_aarch64_simd_ci __o;
24661 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24662 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24663 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24664 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24667 __extension__ static __inline void __attribute__ ((__always_inline__))
24668 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24670 __builtin_aarch64_simd_ci __o;
24671 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24672 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24673 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24674 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24677 __extension__ static __inline void __attribute__ ((__always_inline__))
24678 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24680 __builtin_aarch64_simd_ci __o;
24681 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24682 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24683 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24684 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24687 __extension__ static __inline void __attribute__ ((__always_inline__))
24688 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24690 __builtin_aarch64_simd_ci __o;
24691 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24692 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24693 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24694 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24697 __extension__ static __inline void __attribute__ ((__always_inline__))
24698 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24700 __builtin_aarch64_simd_ci __o;
24701 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24702 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24703 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24704 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24707 __extension__ static __inline void __attribute__ ((__always_inline__))
24708 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24710 __builtin_aarch64_simd_ci __o;
24711 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24712 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24713 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24714 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24717 __extension__ static __inline void __attribute__ ((__always_inline__))
24718 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24720 __builtin_aarch64_simd_ci __o;
24721 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24722 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24723 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24724 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24727 __extension__ static __inline void __attribute__ ((__always_inline__))
24728 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24730 __builtin_aarch64_simd_ci __o;
24731 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24732 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24733 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24734 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24737 __extension__ static __inline void __attribute__ ((__always_inline__))
24738 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24740 __builtin_aarch64_simd_ci __o;
24741 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24742 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24743 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24744 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24747 __extension__ static __inline void __attribute__ ((__always_inline__))
24748 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24750 __builtin_aarch64_simd_ci __o;
24751 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24752 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24753 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24754 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24757 __extension__ static __inline void
24758 vst4_s64 (int64_t * __a, int64x1x4_t val)
24760 __builtin_aarch64_simd_xi __o;
24761 int64x2x4_t temp;
24762 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24763 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24764 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24765 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24766 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24767 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24768 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24769 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24770 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24773 __extension__ static __inline void
24774 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24776 __builtin_aarch64_simd_xi __o;
24777 uint64x2x4_t temp;
24778 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24779 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24780 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24781 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24782 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24783 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24784 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24785 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24786 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24789 __extension__ static __inline void
24790 vst4_f64 (float64_t * __a, float64x1x4_t val)
24792 __builtin_aarch64_simd_xi __o;
24793 float64x2x4_t temp;
24794 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24795 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24796 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24797 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24798 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24799 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24800 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24801 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24802 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24805 __extension__ static __inline void
24806 vst4_s8 (int8_t * __a, int8x8x4_t val)
24808 __builtin_aarch64_simd_xi __o;
24809 int8x16x4_t temp;
24810 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24811 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24812 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24813 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24814 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24815 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24816 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24817 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24818 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24821 __extension__ static __inline void __attribute__ ((__always_inline__))
24822 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24824 __builtin_aarch64_simd_xi __o;
24825 poly8x16x4_t temp;
24826 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24827 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24828 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24829 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24830 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24831 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24832 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24833 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24834 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24837 __extension__ static __inline void __attribute__ ((__always_inline__))
24838 vst4_s16 (int16_t * __a, int16x4x4_t val)
24840 __builtin_aarch64_simd_xi __o;
24841 int16x8x4_t temp;
24842 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24843 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24844 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24845 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24846 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24847 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24848 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24849 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24850 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24853 __extension__ static __inline void __attribute__ ((__always_inline__))
24854 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24856 __builtin_aarch64_simd_xi __o;
24857 poly16x8x4_t temp;
24858 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24859 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24860 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24861 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24862 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24863 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24864 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24865 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24866 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24869 __extension__ static __inline void __attribute__ ((__always_inline__))
24870 vst4_s32 (int32_t * __a, int32x2x4_t val)
24872 __builtin_aarch64_simd_xi __o;
24873 int32x4x4_t temp;
24874 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24875 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24876 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24877 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
24878 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24879 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24880 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24881 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24882 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24885 __extension__ static __inline void __attribute__ ((__always_inline__))
24886 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24888 __builtin_aarch64_simd_xi __o;
24889 uint8x16x4_t temp;
24890 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24891 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24892 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24893 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
24894 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24895 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24896 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24897 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24898 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24901 __extension__ static __inline void __attribute__ ((__always_inline__))
24902 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24904 __builtin_aarch64_simd_xi __o;
24905 uint16x8x4_t temp;
24906 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24907 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24908 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24909 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
24910 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24911 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24912 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24913 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24914 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24917 __extension__ static __inline void __attribute__ ((__always_inline__))
24918 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24920 __builtin_aarch64_simd_xi __o;
24921 uint32x4x4_t temp;
24922 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24923 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24924 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24925 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
24926 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24927 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24928 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24929 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24930 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24933 __extension__ static __inline void __attribute__ ((__always_inline__))
24934 vst4_f32 (float32_t * __a, float32x2x4_t val)
24936 __builtin_aarch64_simd_xi __o;
24937 float32x4x4_t temp;
24938 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24939 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24940 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24941 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
24942 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24943 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24944 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24945 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24946 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24949 __extension__ static __inline void __attribute__ ((__always_inline__))
24950 vst4q_s8 (int8_t * __a, int8x16x4_t val)
24952 __builtin_aarch64_simd_xi __o;
24953 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24954 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24955 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24956 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24957 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24960 __extension__ static __inline void __attribute__ ((__always_inline__))
24961 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24963 __builtin_aarch64_simd_xi __o;
24964 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24965 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24966 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24967 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24968 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24971 __extension__ static __inline void __attribute__ ((__always_inline__))
24972 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24974 __builtin_aarch64_simd_xi __o;
24975 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24976 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24977 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24978 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24979 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24982 __extension__ static __inline void __attribute__ ((__always_inline__))
24983 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24985 __builtin_aarch64_simd_xi __o;
24986 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24987 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24988 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24989 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24990 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24993 __extension__ static __inline void __attribute__ ((__always_inline__))
24994 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24996 __builtin_aarch64_simd_xi __o;
24997 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24998 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24999 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25000 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25001 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25004 __extension__ static __inline void __attribute__ ((__always_inline__))
25005 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25007 __builtin_aarch64_simd_xi __o;
25008 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25009 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25010 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25011 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25012 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25015 __extension__ static __inline void __attribute__ ((__always_inline__))
25016 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25018 __builtin_aarch64_simd_xi __o;
25019 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25020 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25021 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25022 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25023 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25026 __extension__ static __inline void __attribute__ ((__always_inline__))
25027 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25029 __builtin_aarch64_simd_xi __o;
25030 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25031 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25032 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25033 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25034 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25037 __extension__ static __inline void __attribute__ ((__always_inline__))
25038 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25040 __builtin_aarch64_simd_xi __o;
25041 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25042 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25043 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25044 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25045 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25048 __extension__ static __inline void __attribute__ ((__always_inline__))
25049 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25051 __builtin_aarch64_simd_xi __o;
25052 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25053 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25054 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25055 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25056 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25059 __extension__ static __inline void __attribute__ ((__always_inline__))
25060 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25062 __builtin_aarch64_simd_xi __o;
25063 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25064 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25065 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25066 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25067 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25070 __extension__ static __inline void __attribute__ ((__always_inline__))
25071 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25073 __builtin_aarch64_simd_xi __o;
25074 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25075 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25076 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25077 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25078 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25081 /* vsub */
25083 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25084 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25086 return __a - __b;
25089 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25090 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25092 return __a - __b;
25095 /* vtrn */
25097 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
25098 vtrn_f32 (float32x2_t a, float32x2_t b)
25100 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25103 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
25104 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25106 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25109 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25110 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25112 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25115 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25116 vtrn_s8 (int8x8_t a, int8x8_t b)
25118 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25121 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25122 vtrn_s16 (int16x4_t a, int16x4_t b)
25124 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25127 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25128 vtrn_s32 (int32x2_t a, int32x2_t b)
25130 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25133 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25134 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25136 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25139 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25140 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25142 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25145 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25146 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25148 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25151 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25152 vtrnq_f32 (float32x4_t a, float32x4_t b)
25154 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25157 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25158 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25160 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25163 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25164 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25166 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25169 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25170 vtrnq_s8 (int8x16_t a, int8x16_t b)
25172 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25175 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25176 vtrnq_s16 (int16x8_t a, int16x8_t b)
25178 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25181 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25182 vtrnq_s32 (int32x4_t a, int32x4_t b)
25184 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25187 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25188 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25190 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25193 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25194 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25196 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25199 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25200 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25202 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25205 /* vtst */
25207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25208 vtst_s8 (int8x8_t __a, int8x8_t __b)
25210 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25213 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25214 vtst_s16 (int16x4_t __a, int16x4_t __b)
25216 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25219 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25220 vtst_s32 (int32x2_t __a, int32x2_t __b)
25222 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25225 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25226 vtst_s64 (int64x1_t __a, int64x1_t __b)
25228 return (__a & __b) ? -1ll : 0ll;
25231 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25232 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25234 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25235 (int8x8_t) __b);
25238 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25239 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25241 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25242 (int16x4_t) __b);
25245 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25246 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25248 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25249 (int32x2_t) __b);
25252 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25253 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25255 return (__a & __b) ? -1ll : 0ll;
25258 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25259 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25261 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25264 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25265 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25267 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25270 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25271 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25273 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25276 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25277 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25279 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25282 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25283 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25285 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25286 (int8x16_t) __b);
25289 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25290 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25292 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25293 (int16x8_t) __b);
25296 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25297 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25299 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25300 (int32x4_t) __b);
25303 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25304 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25306 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25307 (int64x2_t) __b);
25310 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25311 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25313 return (__a & __b) ? -1ll : 0ll;
25316 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25317 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25319 return (__a & __b) ? -1ll : 0ll;
25322 /* vuqadd */
25324 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25325 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25327 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25330 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25331 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25333 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25336 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25337 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25339 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25342 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25343 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25345 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25348 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25349 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25351 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25355 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25357 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25360 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25361 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25363 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25366 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25367 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25369 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25372 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25373 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25375 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25378 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25379 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25381 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25384 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25385 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25387 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25390 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25391 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25393 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25396 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25397 __extension__ static __inline rettype \
25398 __attribute__ ((__always_inline__)) \
25399 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25401 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25402 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25405 #define __INTERLEAVE_LIST(op) \
25406 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25407 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25408 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25409 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25410 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25411 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25412 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25413 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25414 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25415 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25416 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25417 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25418 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25419 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25420 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25421 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25422 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25423 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25425 /* vuzp */
25427 __INTERLEAVE_LIST (uzp)
25429 /* vzip */
25431 __INTERLEAVE_LIST (zip)
25433 #undef __INTERLEAVE_LIST
25434 #undef __DEFINTERLEAVE
25436 /* End of optimal implementations in approved order. */
25438 #undef __aarch64_vget_lane_any
25439 #undef __aarch64_vget_lane_f32
25440 #undef __aarch64_vget_lane_f64
25441 #undef __aarch64_vget_lane_p8
25442 #undef __aarch64_vget_lane_p16
25443 #undef __aarch64_vget_lane_s8
25444 #undef __aarch64_vget_lane_s16
25445 #undef __aarch64_vget_lane_s32
25446 #undef __aarch64_vget_lane_s64
25447 #undef __aarch64_vget_lane_u8
25448 #undef __aarch64_vget_lane_u16
25449 #undef __aarch64_vget_lane_u32
25450 #undef __aarch64_vget_lane_u64
25452 #undef __aarch64_vgetq_lane_f32
25453 #undef __aarch64_vgetq_lane_f64
25454 #undef __aarch64_vgetq_lane_p8
25455 #undef __aarch64_vgetq_lane_p16
25456 #undef __aarch64_vgetq_lane_s8
25457 #undef __aarch64_vgetq_lane_s16
25458 #undef __aarch64_vgetq_lane_s32
25459 #undef __aarch64_vgetq_lane_s64
25460 #undef __aarch64_vgetq_lane_u8
25461 #undef __aarch64_vgetq_lane_u16
25462 #undef __aarch64_vgetq_lane_u32
25463 #undef __aarch64_vgetq_lane_u64
25465 #undef __aarch64_vdup_lane_any
25466 #undef __aarch64_vdup_lane_f32
25467 #undef __aarch64_vdup_lane_f64
25468 #undef __aarch64_vdup_lane_p8
25469 #undef __aarch64_vdup_lane_p16
25470 #undef __aarch64_vdup_lane_s8
25471 #undef __aarch64_vdup_lane_s16
25472 #undef __aarch64_vdup_lane_s32
25473 #undef __aarch64_vdup_lane_s64
25474 #undef __aarch64_vdup_lane_u8
25475 #undef __aarch64_vdup_lane_u16
25476 #undef __aarch64_vdup_lane_u32
25477 #undef __aarch64_vdup_lane_u64
25478 #undef __aarch64_vdup_laneq_f32
25479 #undef __aarch64_vdup_laneq_f64
25480 #undef __aarch64_vdup_laneq_p8
25481 #undef __aarch64_vdup_laneq_p16
25482 #undef __aarch64_vdup_laneq_s8
25483 #undef __aarch64_vdup_laneq_s16
25484 #undef __aarch64_vdup_laneq_s32
25485 #undef __aarch64_vdup_laneq_s64
25486 #undef __aarch64_vdup_laneq_u8
25487 #undef __aarch64_vdup_laneq_u16
25488 #undef __aarch64_vdup_laneq_u32
25489 #undef __aarch64_vdup_laneq_u64
25490 #undef __aarch64_vdupq_lane_f32
25491 #undef __aarch64_vdupq_lane_f64
25492 #undef __aarch64_vdupq_lane_p8
25493 #undef __aarch64_vdupq_lane_p16
25494 #undef __aarch64_vdupq_lane_s8
25495 #undef __aarch64_vdupq_lane_s16
25496 #undef __aarch64_vdupq_lane_s32
25497 #undef __aarch64_vdupq_lane_s64
25498 #undef __aarch64_vdupq_lane_u8
25499 #undef __aarch64_vdupq_lane_u16
25500 #undef __aarch64_vdupq_lane_u32
25501 #undef __aarch64_vdupq_lane_u64
25502 #undef __aarch64_vdupq_laneq_f32
25503 #undef __aarch64_vdupq_laneq_f64
25504 #undef __aarch64_vdupq_laneq_p8
25505 #undef __aarch64_vdupq_laneq_p16
25506 #undef __aarch64_vdupq_laneq_s8
25507 #undef __aarch64_vdupq_laneq_s16
25508 #undef __aarch64_vdupq_laneq_s32
25509 #undef __aarch64_vdupq_laneq_s64
25510 #undef __aarch64_vdupq_laneq_u8
25511 #undef __aarch64_vdupq_laneq_u16
25512 #undef __aarch64_vdupq_laneq_u32
25513 #undef __aarch64_vdupq_laneq_u64
25515 #endif