[PATCH 2/2] [AARCH64,NEON] Convert arm_neon.h to use new builtins for vld[234](q...
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob9eb04c4881a749e7e357f7731224fa482a249239
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef __builtin_aarch64_simd_di int64x1_t
42 __attribute__ ((__vector_size__ (8)));
43 typedef __builtin_aarch64_simd_df float64x1_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_sf float32x2_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly8 poly8x8_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_poly16 poly16x4_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uqi uint8x8_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_uhi uint16x4_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef __builtin_aarch64_simd_usi uint32x2_t
56 __attribute__ ((__vector_size__ (8)));
57 typedef __builtin_aarch64_simd_udi uint64x1_t
58 __attribute__ ((__vector_size__ (8)));
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_poly64 poly64x2_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uqi uint8x16_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_uhi uint16x8_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_usi uint32x4_t
82 __attribute__ ((__vector_size__ (16)));
83 typedef __builtin_aarch64_simd_udi uint64x2_t
84 __attribute__ ((__vector_size__ (16)));
86 typedef float float32_t;
87 typedef double float64_t;
88 typedef __builtin_aarch64_simd_poly8 poly8_t;
89 typedef __builtin_aarch64_simd_poly16 poly16_t;
90 typedef __builtin_aarch64_simd_poly64 poly64_t;
91 typedef __builtin_aarch64_simd_poly128 poly128_t;
93 typedef struct int8x8x2_t
95 int8x8_t val[2];
96 } int8x8x2_t;
98 typedef struct int8x16x2_t
100 int8x16_t val[2];
101 } int8x16x2_t;
103 typedef struct int16x4x2_t
105 int16x4_t val[2];
106 } int16x4x2_t;
108 typedef struct int16x8x2_t
110 int16x8_t val[2];
111 } int16x8x2_t;
113 typedef struct int32x2x2_t
115 int32x2_t val[2];
116 } int32x2x2_t;
118 typedef struct int32x4x2_t
120 int32x4_t val[2];
121 } int32x4x2_t;
123 typedef struct int64x1x2_t
125 int64x1_t val[2];
126 } int64x1x2_t;
128 typedef struct int64x2x2_t
130 int64x2_t val[2];
131 } int64x2x2_t;
133 typedef struct uint8x8x2_t
135 uint8x8_t val[2];
136 } uint8x8x2_t;
138 typedef struct uint8x16x2_t
140 uint8x16_t val[2];
141 } uint8x16x2_t;
143 typedef struct uint16x4x2_t
145 uint16x4_t val[2];
146 } uint16x4x2_t;
148 typedef struct uint16x8x2_t
150 uint16x8_t val[2];
151 } uint16x8x2_t;
153 typedef struct uint32x2x2_t
155 uint32x2_t val[2];
156 } uint32x2x2_t;
158 typedef struct uint32x4x2_t
160 uint32x4_t val[2];
161 } uint32x4x2_t;
163 typedef struct uint64x1x2_t
165 uint64x1_t val[2];
166 } uint64x1x2_t;
168 typedef struct uint64x2x2_t
170 uint64x2_t val[2];
171 } uint64x2x2_t;
173 typedef struct float32x2x2_t
175 float32x2_t val[2];
176 } float32x2x2_t;
178 typedef struct float32x4x2_t
180 float32x4_t val[2];
181 } float32x4x2_t;
183 typedef struct float64x2x2_t
185 float64x2_t val[2];
186 } float64x2x2_t;
188 typedef struct float64x1x2_t
190 float64x1_t val[2];
191 } float64x1x2_t;
193 typedef struct poly8x8x2_t
195 poly8x8_t val[2];
196 } poly8x8x2_t;
198 typedef struct poly8x16x2_t
200 poly8x16_t val[2];
201 } poly8x16x2_t;
203 typedef struct poly16x4x2_t
205 poly16x4_t val[2];
206 } poly16x4x2_t;
208 typedef struct poly16x8x2_t
210 poly16x8_t val[2];
211 } poly16x8x2_t;
213 typedef struct int8x8x3_t
215 int8x8_t val[3];
216 } int8x8x3_t;
218 typedef struct int8x16x3_t
220 int8x16_t val[3];
221 } int8x16x3_t;
223 typedef struct int16x4x3_t
225 int16x4_t val[3];
226 } int16x4x3_t;
228 typedef struct int16x8x3_t
230 int16x8_t val[3];
231 } int16x8x3_t;
233 typedef struct int32x2x3_t
235 int32x2_t val[3];
236 } int32x2x3_t;
238 typedef struct int32x4x3_t
240 int32x4_t val[3];
241 } int32x4x3_t;
243 typedef struct int64x1x3_t
245 int64x1_t val[3];
246 } int64x1x3_t;
248 typedef struct int64x2x3_t
250 int64x2_t val[3];
251 } int64x2x3_t;
253 typedef struct uint8x8x3_t
255 uint8x8_t val[3];
256 } uint8x8x3_t;
258 typedef struct uint8x16x3_t
260 uint8x16_t val[3];
261 } uint8x16x3_t;
263 typedef struct uint16x4x3_t
265 uint16x4_t val[3];
266 } uint16x4x3_t;
268 typedef struct uint16x8x3_t
270 uint16x8_t val[3];
271 } uint16x8x3_t;
273 typedef struct uint32x2x3_t
275 uint32x2_t val[3];
276 } uint32x2x3_t;
278 typedef struct uint32x4x3_t
280 uint32x4_t val[3];
281 } uint32x4x3_t;
283 typedef struct uint64x1x3_t
285 uint64x1_t val[3];
286 } uint64x1x3_t;
288 typedef struct uint64x2x3_t
290 uint64x2_t val[3];
291 } uint64x2x3_t;
293 typedef struct float32x2x3_t
295 float32x2_t val[3];
296 } float32x2x3_t;
298 typedef struct float32x4x3_t
300 float32x4_t val[3];
301 } float32x4x3_t;
303 typedef struct float64x2x3_t
305 float64x2_t val[3];
306 } float64x2x3_t;
308 typedef struct float64x1x3_t
310 float64x1_t val[3];
311 } float64x1x3_t;
313 typedef struct poly8x8x3_t
315 poly8x8_t val[3];
316 } poly8x8x3_t;
318 typedef struct poly8x16x3_t
320 poly8x16_t val[3];
321 } poly8x16x3_t;
323 typedef struct poly16x4x3_t
325 poly16x4_t val[3];
326 } poly16x4x3_t;
328 typedef struct poly16x8x3_t
330 poly16x8_t val[3];
331 } poly16x8x3_t;
333 typedef struct int8x8x4_t
335 int8x8_t val[4];
336 } int8x8x4_t;
338 typedef struct int8x16x4_t
340 int8x16_t val[4];
341 } int8x16x4_t;
343 typedef struct int16x4x4_t
345 int16x4_t val[4];
346 } int16x4x4_t;
348 typedef struct int16x8x4_t
350 int16x8_t val[4];
351 } int16x8x4_t;
353 typedef struct int32x2x4_t
355 int32x2_t val[4];
356 } int32x2x4_t;
358 typedef struct int32x4x4_t
360 int32x4_t val[4];
361 } int32x4x4_t;
363 typedef struct int64x1x4_t
365 int64x1_t val[4];
366 } int64x1x4_t;
368 typedef struct int64x2x4_t
370 int64x2_t val[4];
371 } int64x2x4_t;
373 typedef struct uint8x8x4_t
375 uint8x8_t val[4];
376 } uint8x8x4_t;
378 typedef struct uint8x16x4_t
380 uint8x16_t val[4];
381 } uint8x16x4_t;
383 typedef struct uint16x4x4_t
385 uint16x4_t val[4];
386 } uint16x4x4_t;
388 typedef struct uint16x8x4_t
390 uint16x8_t val[4];
391 } uint16x8x4_t;
393 typedef struct uint32x2x4_t
395 uint32x2_t val[4];
396 } uint32x2x4_t;
398 typedef struct uint32x4x4_t
400 uint32x4_t val[4];
401 } uint32x4x4_t;
403 typedef struct uint64x1x4_t
405 uint64x1_t val[4];
406 } uint64x1x4_t;
408 typedef struct uint64x2x4_t
410 uint64x2_t val[4];
411 } uint64x2x4_t;
413 typedef struct float32x2x4_t
415 float32x2_t val[4];
416 } float32x2x4_t;
418 typedef struct float32x4x4_t
420 float32x4_t val[4];
421 } float32x4x4_t;
423 typedef struct float64x2x4_t
425 float64x2_t val[4];
426 } float64x2x4_t;
428 typedef struct float64x1x4_t
430 float64x1_t val[4];
431 } float64x1x4_t;
433 typedef struct poly8x8x4_t
435 poly8x8_t val[4];
436 } poly8x8x4_t;
438 typedef struct poly8x16x4_t
440 poly8x16_t val[4];
441 } poly8x16x4_t;
443 typedef struct poly16x4x4_t
445 poly16x4_t val[4];
446 } poly16x4x4_t;
448 typedef struct poly16x8x4_t
450 poly16x8_t val[4];
451 } poly16x8x4_t;
453 /* vget_lane internal macros. */
455 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
456 (__cast_ret \
457 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
459 #define __aarch64_vget_lane_f32(__a, __b) \
460 __aarch64_vget_lane_any (v2sf, , , __a, __b)
461 #define __aarch64_vget_lane_f64(__a, __b) __extension__ \
462 ({ \
463 __builtin_aarch64_im_lane_boundsi (__b, 1); \
464 __a[0]; \
467 #define __aarch64_vget_lane_p8(__a, __b) \
468 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
469 #define __aarch64_vget_lane_p16(__a, __b) \
470 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
472 #define __aarch64_vget_lane_s8(__a, __b) \
473 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
474 #define __aarch64_vget_lane_s16(__a, __b) \
475 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
476 #define __aarch64_vget_lane_s32(__a, __b) \
477 __aarch64_vget_lane_any (v2si, , ,__a, __b)
478 #define __aarch64_vget_lane_s64(__a, __b) __extension__ \
479 ({ \
480 __builtin_aarch64_im_lane_boundsi (__b, 1); \
481 __a[0]; \
484 #define __aarch64_vget_lane_u8(__a, __b) \
485 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
486 #define __aarch64_vget_lane_u16(__a, __b) \
487 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
488 #define __aarch64_vget_lane_u32(__a, __b) \
489 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
490 #define __aarch64_vget_lane_u64(__a, __b) __extension__ \
491 ({ \
492 __builtin_aarch64_im_lane_boundsi (__b, 1); \
493 __a[0]; \
496 #define __aarch64_vgetq_lane_f32(__a, __b) \
497 __aarch64_vget_lane_any (v4sf, , , __a, __b)
498 #define __aarch64_vgetq_lane_f64(__a, __b) \
499 __aarch64_vget_lane_any (v2df, , , __a, __b)
501 #define __aarch64_vgetq_lane_p8(__a, __b) \
502 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
503 #define __aarch64_vgetq_lane_p16(__a, __b) \
504 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
506 #define __aarch64_vgetq_lane_s8(__a, __b) \
507 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
508 #define __aarch64_vgetq_lane_s16(__a, __b) \
509 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
510 #define __aarch64_vgetq_lane_s32(__a, __b) \
511 __aarch64_vget_lane_any (v4si, , ,__a, __b)
512 #define __aarch64_vgetq_lane_s64(__a, __b) \
513 __aarch64_vget_lane_any (v2di, , ,__a, __b)
515 #define __aarch64_vgetq_lane_u8(__a, __b) \
516 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
517 #define __aarch64_vgetq_lane_u16(__a, __b) \
518 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
519 #define __aarch64_vgetq_lane_u32(__a, __b) \
520 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
521 #define __aarch64_vgetq_lane_u64(__a, __b) \
522 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
524 /* __aarch64_vdup_lane internal macros. */
525 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
526 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
528 #define __aarch64_vdup_lane_f32(__a, __b) \
529 __aarch64_vdup_lane_any (f32, , , __a, __b)
530 #define __aarch64_vdup_lane_f64(__a, __b) \
531 __aarch64_vdup_lane_any (f64, , , __a, __b)
532 #define __aarch64_vdup_lane_p8(__a, __b) \
533 __aarch64_vdup_lane_any (p8, , , __a, __b)
534 #define __aarch64_vdup_lane_p16(__a, __b) \
535 __aarch64_vdup_lane_any (p16, , , __a, __b)
536 #define __aarch64_vdup_lane_s8(__a, __b) \
537 __aarch64_vdup_lane_any (s8, , , __a, __b)
538 #define __aarch64_vdup_lane_s16(__a, __b) \
539 __aarch64_vdup_lane_any (s16, , , __a, __b)
540 #define __aarch64_vdup_lane_s32(__a, __b) \
541 __aarch64_vdup_lane_any (s32, , , __a, __b)
542 #define __aarch64_vdup_lane_s64(__a, __b) \
543 __aarch64_vdup_lane_any (s64, , , __a, __b)
544 #define __aarch64_vdup_lane_u8(__a, __b) \
545 __aarch64_vdup_lane_any (u8, , , __a, __b)
546 #define __aarch64_vdup_lane_u16(__a, __b) \
547 __aarch64_vdup_lane_any (u16, , , __a, __b)
548 #define __aarch64_vdup_lane_u32(__a, __b) \
549 __aarch64_vdup_lane_any (u32, , , __a, __b)
550 #define __aarch64_vdup_lane_u64(__a, __b) \
551 __aarch64_vdup_lane_any (u64, , , __a, __b)
553 /* __aarch64_vdup_laneq internal macros. */
554 #define __aarch64_vdup_laneq_f32(__a, __b) \
555 __aarch64_vdup_lane_any (f32, , q, __a, __b)
556 #define __aarch64_vdup_laneq_f64(__a, __b) \
557 __aarch64_vdup_lane_any (f64, , q, __a, __b)
558 #define __aarch64_vdup_laneq_p8(__a, __b) \
559 __aarch64_vdup_lane_any (p8, , q, __a, __b)
560 #define __aarch64_vdup_laneq_p16(__a, __b) \
561 __aarch64_vdup_lane_any (p16, , q, __a, __b)
562 #define __aarch64_vdup_laneq_s8(__a, __b) \
563 __aarch64_vdup_lane_any (s8, , q, __a, __b)
564 #define __aarch64_vdup_laneq_s16(__a, __b) \
565 __aarch64_vdup_lane_any (s16, , q, __a, __b)
566 #define __aarch64_vdup_laneq_s32(__a, __b) \
567 __aarch64_vdup_lane_any (s32, , q, __a, __b)
568 #define __aarch64_vdup_laneq_s64(__a, __b) \
569 __aarch64_vdup_lane_any (s64, , q, __a, __b)
570 #define __aarch64_vdup_laneq_u8(__a, __b) \
571 __aarch64_vdup_lane_any (u8, , q, __a, __b)
572 #define __aarch64_vdup_laneq_u16(__a, __b) \
573 __aarch64_vdup_lane_any (u16, , q, __a, __b)
574 #define __aarch64_vdup_laneq_u32(__a, __b) \
575 __aarch64_vdup_lane_any (u32, , q, __a, __b)
576 #define __aarch64_vdup_laneq_u64(__a, __b) \
577 __aarch64_vdup_lane_any (u64, , q, __a, __b)
579 /* __aarch64_vdupq_lane internal macros. */
580 #define __aarch64_vdupq_lane_f32(__a, __b) \
581 __aarch64_vdup_lane_any (f32, q, , __a, __b)
582 #define __aarch64_vdupq_lane_f64(__a, __b) \
583 __aarch64_vdup_lane_any (f64, q, , __a, __b)
584 #define __aarch64_vdupq_lane_p8(__a, __b) \
585 __aarch64_vdup_lane_any (p8, q, , __a, __b)
586 #define __aarch64_vdupq_lane_p16(__a, __b) \
587 __aarch64_vdup_lane_any (p16, q, , __a, __b)
588 #define __aarch64_vdupq_lane_s8(__a, __b) \
589 __aarch64_vdup_lane_any (s8, q, , __a, __b)
590 #define __aarch64_vdupq_lane_s16(__a, __b) \
591 __aarch64_vdup_lane_any (s16, q, , __a, __b)
592 #define __aarch64_vdupq_lane_s32(__a, __b) \
593 __aarch64_vdup_lane_any (s32, q, , __a, __b)
594 #define __aarch64_vdupq_lane_s64(__a, __b) \
595 __aarch64_vdup_lane_any (s64, q, , __a, __b)
596 #define __aarch64_vdupq_lane_u8(__a, __b) \
597 __aarch64_vdup_lane_any (u8, q, , __a, __b)
598 #define __aarch64_vdupq_lane_u16(__a, __b) \
599 __aarch64_vdup_lane_any (u16, q, , __a, __b)
600 #define __aarch64_vdupq_lane_u32(__a, __b) \
601 __aarch64_vdup_lane_any (u32, q, , __a, __b)
602 #define __aarch64_vdupq_lane_u64(__a, __b) \
603 __aarch64_vdup_lane_any (u64, q, , __a, __b)
605 /* __aarch64_vdupq_laneq internal macros. */
606 #define __aarch64_vdupq_laneq_f32(__a, __b) \
607 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
608 #define __aarch64_vdupq_laneq_f64(__a, __b) \
609 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
610 #define __aarch64_vdupq_laneq_p8(__a, __b) \
611 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
612 #define __aarch64_vdupq_laneq_p16(__a, __b) \
613 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
614 #define __aarch64_vdupq_laneq_s8(__a, __b) \
615 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
616 #define __aarch64_vdupq_laneq_s16(__a, __b) \
617 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
618 #define __aarch64_vdupq_laneq_s32(__a, __b) \
619 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
620 #define __aarch64_vdupq_laneq_s64(__a, __b) \
621 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
622 #define __aarch64_vdupq_laneq_u8(__a, __b) \
623 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
624 #define __aarch64_vdupq_laneq_u16(__a, __b) \
625 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
626 #define __aarch64_vdupq_laneq_u32(__a, __b) \
627 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
628 #define __aarch64_vdupq_laneq_u64(__a, __b) \
629 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
631 /* vset_lane internal macro. */
633 #ifdef __AARCH64EB__
634 /* For big-endian, GCC's vector indices are the opposite way around
635 to the architectural lane indices used by Neon intrinsics. */
636 #define __aarch64_vset_lane_any(__vec, __index, __val, __lanes) \
637 __extension__ \
638 ({ \
639 __builtin_aarch64_im_lane_boundsi (__index, __lanes); \
640 __vec[__lanes - 1 - __index] = __val; \
641 __vec; \
643 #else
644 #define __aarch64_vset_lane_any(__vec, __index, __val, __lanes) \
645 __extension__ \
646 ({ \
647 __builtin_aarch64_im_lane_boundsi (__index, __lanes); \
648 __vec[__index] = __val; \
649 __vec; \
651 #endif
653 /* vset_lane */
655 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
656 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
658 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
661 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
662 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
664 return __aarch64_vset_lane_any (__vec, __index, __elem, 1);
667 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
668 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
670 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
673 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
674 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
676 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
679 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
680 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
682 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
685 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
686 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
688 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
692 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
694 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
697 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
698 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
700 return __aarch64_vset_lane_any (__vec, __index, __elem, 1);
703 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
704 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
706 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
709 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
710 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
712 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
715 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
716 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
718 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
721 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
722 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
724 return __aarch64_vset_lane_any (__vec, __index, __elem, 1);
727 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
728 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
730 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
733 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
734 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
736 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
739 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
740 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
742 return __aarch64_vset_lane_any (__vec, __index, __elem, 16);
745 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
746 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
748 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
751 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
752 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
754 return __aarch64_vset_lane_any (__vec, __index, __elem, 16);
757 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
758 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
760 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
764 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
766 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
769 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
770 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
772 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
775 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
776 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
778 return __aarch64_vset_lane_any (__vec, __index, __elem, 16);
781 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
782 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
784 return __aarch64_vset_lane_any (__vec, __index, __elem, 8);
787 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
788 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
790 return __aarch64_vset_lane_any (__vec, __index, __elem, 4);
793 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
794 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
796 return __aarch64_vset_lane_any (__vec, __index, __elem, 2);
799 /* vadd */
800 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
801 vadd_s8 (int8x8_t __a, int8x8_t __b)
803 return __a + __b;
806 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
807 vadd_s16 (int16x4_t __a, int16x4_t __b)
809 return __a + __b;
812 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
813 vadd_s32 (int32x2_t __a, int32x2_t __b)
815 return __a + __b;
818 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
819 vadd_f32 (float32x2_t __a, float32x2_t __b)
821 return __a + __b;
824 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
825 vadd_f64 (float64x1_t __a, float64x1_t __b)
827 return __a + __b;
830 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
831 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
833 return __a + __b;
836 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
837 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
839 return __a + __b;
842 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
843 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
845 return __a + __b;
848 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
849 vadd_s64 (int64x1_t __a, int64x1_t __b)
851 return __a + __b;
854 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
855 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
857 return __a + __b;
860 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
861 vaddq_s8 (int8x16_t __a, int8x16_t __b)
863 return __a + __b;
866 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
867 vaddq_s16 (int16x8_t __a, int16x8_t __b)
869 return __a + __b;
872 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
873 vaddq_s32 (int32x4_t __a, int32x4_t __b)
875 return __a + __b;
878 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
879 vaddq_s64 (int64x2_t __a, int64x2_t __b)
881 return __a + __b;
884 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
885 vaddq_f32 (float32x4_t __a, float32x4_t __b)
887 return __a + __b;
890 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
891 vaddq_f64 (float64x2_t __a, float64x2_t __b)
893 return __a + __b;
896 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
897 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
899 return __a + __b;
902 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
903 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
905 return __a + __b;
908 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
909 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
911 return __a + __b;
914 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
915 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
917 return __a + __b;
920 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
921 vaddl_s8 (int8x8_t __a, int8x8_t __b)
923 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
926 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
927 vaddl_s16 (int16x4_t __a, int16x4_t __b)
929 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
932 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
933 vaddl_s32 (int32x2_t __a, int32x2_t __b)
935 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
938 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
939 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
941 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
942 (int8x8_t) __b);
945 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
946 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
948 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
949 (int16x4_t) __b);
952 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
953 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
955 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
956 (int32x2_t) __b);
959 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
960 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
962 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
966 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
968 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
971 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
972 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
974 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
977 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
978 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
980 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
981 (int8x16_t) __b);
984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
985 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
987 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
988 (int16x8_t) __b);
991 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
992 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
994 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
995 (int32x4_t) __b);
998 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
999 vaddw_s8 (int16x8_t __a, int8x8_t __b)
1001 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
1004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1005 vaddw_s16 (int32x4_t __a, int16x4_t __b)
1007 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
1010 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1011 vaddw_s32 (int64x2_t __a, int32x2_t __b)
1013 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
1016 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1017 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
1019 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
1020 (int8x8_t) __b);
1023 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1024 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
1026 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
1027 (int16x4_t) __b);
1030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1031 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
1033 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
1034 (int32x2_t) __b);
1037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1038 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
1040 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
1043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1044 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
1046 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
1049 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1050 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
1052 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
1055 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1056 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
1058 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
1059 (int8x16_t) __b);
1062 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1063 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
1065 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
1066 (int16x8_t) __b);
1069 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1070 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
1072 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
1073 (int32x4_t) __b);
1076 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1077 vhadd_s8 (int8x8_t __a, int8x8_t __b)
1079 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
1082 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1083 vhadd_s16 (int16x4_t __a, int16x4_t __b)
1085 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
1088 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1089 vhadd_s32 (int32x2_t __a, int32x2_t __b)
1091 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
1094 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1095 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
1097 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
1098 (int8x8_t) __b);
1101 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1102 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
1104 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
1105 (int16x4_t) __b);
1108 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1109 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1111 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
1112 (int32x2_t) __b);
1115 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1116 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
1118 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
1121 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1122 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
1124 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
1127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1128 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
1130 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
1133 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1134 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1136 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
1137 (int8x16_t) __b);
1140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1141 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1143 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
1144 (int16x8_t) __b);
1147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1148 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1150 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
1151 (int32x4_t) __b);
1154 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1155 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
1157 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
1160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1161 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
1163 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
1166 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1167 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
1169 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
1172 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1173 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
1175 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
1176 (int8x8_t) __b);
1179 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1180 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
1182 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1183 (int16x4_t) __b);
1186 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1187 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1189 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1190 (int32x2_t) __b);
1193 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1194 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1196 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1199 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1200 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1202 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1205 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1206 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1208 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1211 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1212 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1214 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1215 (int8x16_t) __b);
1218 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1219 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1221 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1222 (int16x8_t) __b);
1225 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1226 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1228 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1229 (int32x4_t) __b);
1232 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1233 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1235 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1238 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1239 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1241 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1244 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1245 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1247 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1250 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1251 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1253 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1254 (int16x8_t) __b);
1257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1258 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1260 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1261 (int32x4_t) __b);
1264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1265 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1267 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1268 (int64x2_t) __b);
1271 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1272 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1274 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1277 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1278 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1280 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1283 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1284 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1286 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1289 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1290 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1292 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1293 (int16x8_t) __b);
1296 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1297 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1299 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1300 (int32x4_t) __b);
1303 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1304 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1306 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1307 (int64x2_t) __b);
1310 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1311 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1313 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1316 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1317 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1319 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1323 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1325 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1329 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1331 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1332 (int16x8_t) __b,
1333 (int16x8_t) __c);
1336 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1337 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1339 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1340 (int32x4_t) __b,
1341 (int32x4_t) __c);
1344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1345 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1347 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1348 (int64x2_t) __b,
1349 (int64x2_t) __c);
1352 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1353 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1355 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1358 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1359 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1361 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1364 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1365 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1367 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1371 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1373 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1374 (int16x8_t) __b,
1375 (int16x8_t) __c);
1378 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1379 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1381 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1382 (int32x4_t) __b,
1383 (int32x4_t) __c);
1386 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1387 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1389 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1390 (int64x2_t) __b,
1391 (int64x2_t) __c);
1394 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1395 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1397 return __a / __b;
1400 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1401 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1403 return __a / __b;
1406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1407 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1409 return __a / __b;
1412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1413 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1415 return __a / __b;
1418 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1419 vmul_s8 (int8x8_t __a, int8x8_t __b)
1421 return __a * __b;
1424 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1425 vmul_s16 (int16x4_t __a, int16x4_t __b)
1427 return __a * __b;
1430 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1431 vmul_s32 (int32x2_t __a, int32x2_t __b)
1433 return __a * __b;
1436 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1437 vmul_f32 (float32x2_t __a, float32x2_t __b)
1439 return __a * __b;
1442 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1443 vmul_f64 (float64x1_t __a, float64x1_t __b)
1445 return __a * __b;
1448 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1449 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1451 return __a * __b;
1454 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1455 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1457 return __a * __b;
1460 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1461 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1463 return __a * __b;
1466 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1467 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1469 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1470 (int8x8_t) __b);
1473 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1474 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1476 return __a * __b;
1479 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1480 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1482 return __a * __b;
1485 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1486 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1488 return __a * __b;
1491 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1492 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1494 return __a * __b;
1497 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1498 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1500 return __a * __b;
1503 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1504 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1506 return __a * __b;
1509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1510 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1512 return __a * __b;
1515 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1516 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1518 return __a * __b;
1521 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1522 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1524 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1525 (int8x16_t) __b);
1528 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1529 vand_s8 (int8x8_t __a, int8x8_t __b)
1531 return __a & __b;
1534 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1535 vand_s16 (int16x4_t __a, int16x4_t __b)
1537 return __a & __b;
1540 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1541 vand_s32 (int32x2_t __a, int32x2_t __b)
1543 return __a & __b;
1546 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1547 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1549 return __a & __b;
1552 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1553 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1555 return __a & __b;
1558 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1559 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1561 return __a & __b;
1564 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1565 vand_s64 (int64x1_t __a, int64x1_t __b)
1567 return __a & __b;
1570 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1571 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1573 return __a & __b;
1576 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1577 vandq_s8 (int8x16_t __a, int8x16_t __b)
1579 return __a & __b;
1582 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1583 vandq_s16 (int16x8_t __a, int16x8_t __b)
1585 return __a & __b;
1588 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1589 vandq_s32 (int32x4_t __a, int32x4_t __b)
1591 return __a & __b;
1594 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1595 vandq_s64 (int64x2_t __a, int64x2_t __b)
1597 return __a & __b;
1600 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1601 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1603 return __a & __b;
1606 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1607 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1609 return __a & __b;
1612 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1613 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1615 return __a & __b;
1618 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1619 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1621 return __a & __b;
1624 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1625 vorr_s8 (int8x8_t __a, int8x8_t __b)
1627 return __a | __b;
1630 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1631 vorr_s16 (int16x4_t __a, int16x4_t __b)
1633 return __a | __b;
1636 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1637 vorr_s32 (int32x2_t __a, int32x2_t __b)
1639 return __a | __b;
1642 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1643 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1645 return __a | __b;
1648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1649 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1651 return __a | __b;
1654 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1655 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1657 return __a | __b;
1660 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1661 vorr_s64 (int64x1_t __a, int64x1_t __b)
1663 return __a | __b;
1666 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1667 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1669 return __a | __b;
1672 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1673 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1675 return __a | __b;
1678 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1679 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1681 return __a | __b;
1684 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1685 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1687 return __a | __b;
1690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1691 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1693 return __a | __b;
1696 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1697 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1699 return __a | __b;
1702 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1703 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1705 return __a | __b;
1708 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1709 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1711 return __a | __b;
1714 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1715 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1717 return __a | __b;
1720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1721 veor_s8 (int8x8_t __a, int8x8_t __b)
1723 return __a ^ __b;
1726 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1727 veor_s16 (int16x4_t __a, int16x4_t __b)
1729 return __a ^ __b;
1732 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1733 veor_s32 (int32x2_t __a, int32x2_t __b)
1735 return __a ^ __b;
1738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1739 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1741 return __a ^ __b;
1744 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1745 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1747 return __a ^ __b;
1750 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1751 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1753 return __a ^ __b;
1756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1757 veor_s64 (int64x1_t __a, int64x1_t __b)
1759 return __a ^ __b;
1762 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1763 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1765 return __a ^ __b;
1768 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1769 veorq_s8 (int8x16_t __a, int8x16_t __b)
1771 return __a ^ __b;
1774 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1775 veorq_s16 (int16x8_t __a, int16x8_t __b)
1777 return __a ^ __b;
1780 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1781 veorq_s32 (int32x4_t __a, int32x4_t __b)
1783 return __a ^ __b;
1786 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1787 veorq_s64 (int64x2_t __a, int64x2_t __b)
1789 return __a ^ __b;
1792 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1793 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1795 return __a ^ __b;
1798 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1799 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1801 return __a ^ __b;
1804 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1805 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1807 return __a ^ __b;
1810 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1811 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1813 return __a ^ __b;
1816 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1817 vbic_s8 (int8x8_t __a, int8x8_t __b)
1819 return __a & ~__b;
1822 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1823 vbic_s16 (int16x4_t __a, int16x4_t __b)
1825 return __a & ~__b;
1828 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1829 vbic_s32 (int32x2_t __a, int32x2_t __b)
1831 return __a & ~__b;
1834 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1835 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1837 return __a & ~__b;
1840 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1841 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1843 return __a & ~__b;
1846 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1847 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1849 return __a & ~__b;
1852 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1853 vbic_s64 (int64x1_t __a, int64x1_t __b)
1855 return __a & ~__b;
1858 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1859 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1861 return __a & ~__b;
1864 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1865 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1867 return __a & ~__b;
1870 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1871 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1873 return __a & ~__b;
1876 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1877 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1879 return __a & ~__b;
1882 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1883 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1885 return __a & ~__b;
1888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1889 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1891 return __a & ~__b;
1894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1895 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1897 return __a & ~__b;
1900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1901 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1903 return __a & ~__b;
1906 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1907 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1909 return __a & ~__b;
1912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1913 vorn_s8 (int8x8_t __a, int8x8_t __b)
1915 return __a | ~__b;
1918 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1919 vorn_s16 (int16x4_t __a, int16x4_t __b)
1921 return __a | ~__b;
1924 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1925 vorn_s32 (int32x2_t __a, int32x2_t __b)
1927 return __a | ~__b;
1930 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1931 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1933 return __a | ~__b;
1936 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1937 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1939 return __a | ~__b;
1942 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1943 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1945 return __a | ~__b;
1948 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1949 vorn_s64 (int64x1_t __a, int64x1_t __b)
1951 return __a | ~__b;
1954 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1955 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1957 return __a | ~__b;
1960 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1961 vornq_s8 (int8x16_t __a, int8x16_t __b)
1963 return __a | ~__b;
1966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1967 vornq_s16 (int16x8_t __a, int16x8_t __b)
1969 return __a | ~__b;
1972 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1973 vornq_s32 (int32x4_t __a, int32x4_t __b)
1975 return __a | ~__b;
1978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1979 vornq_s64 (int64x2_t __a, int64x2_t __b)
1981 return __a | ~__b;
1984 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1985 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1987 return __a | ~__b;
1990 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1991 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1993 return __a | ~__b;
1996 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1997 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1999 return __a | ~__b;
2002 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2003 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
2005 return __a | ~__b;
2008 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2009 vsub_s8 (int8x8_t __a, int8x8_t __b)
2011 return __a - __b;
2014 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2015 vsub_s16 (int16x4_t __a, int16x4_t __b)
2017 return __a - __b;
2020 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2021 vsub_s32 (int32x2_t __a, int32x2_t __b)
2023 return __a - __b;
2026 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2027 vsub_f32 (float32x2_t __a, float32x2_t __b)
2029 return __a - __b;
2032 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2033 vsub_f64 (float64x1_t __a, float64x1_t __b)
2035 return __a - __b;
2038 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2039 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
2041 return __a - __b;
2044 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2045 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
2047 return __a - __b;
2050 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2051 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
2053 return __a - __b;
2056 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2057 vsub_s64 (int64x1_t __a, int64x1_t __b)
2059 return __a - __b;
2062 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2063 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
2065 return __a - __b;
2068 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2069 vsubq_s8 (int8x16_t __a, int8x16_t __b)
2071 return __a - __b;
2074 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2075 vsubq_s16 (int16x8_t __a, int16x8_t __b)
2077 return __a - __b;
2080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2081 vsubq_s32 (int32x4_t __a, int32x4_t __b)
2083 return __a - __b;
2086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2087 vsubq_s64 (int64x2_t __a, int64x2_t __b)
2089 return __a - __b;
2092 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2093 vsubq_f32 (float32x4_t __a, float32x4_t __b)
2095 return __a - __b;
2098 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
2099 vsubq_f64 (float64x2_t __a, float64x2_t __b)
2101 return __a - __b;
2104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2105 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2107 return __a - __b;
2110 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2111 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2113 return __a - __b;
2116 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2117 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2119 return __a - __b;
2122 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2123 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2125 return __a - __b;
2128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2129 vsubl_s8 (int8x8_t __a, int8x8_t __b)
2131 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
2134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2135 vsubl_s16 (int16x4_t __a, int16x4_t __b)
2137 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
2140 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2141 vsubl_s32 (int32x2_t __a, int32x2_t __b)
2143 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
2146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2147 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
2149 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
2150 (int8x8_t) __b);
2153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2154 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
2156 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
2157 (int16x4_t) __b);
2160 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2161 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
2163 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
2164 (int32x2_t) __b);
2167 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2168 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
2170 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
2173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2174 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
2176 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
2179 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2180 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
2182 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
2185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2186 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
2188 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2189 (int8x16_t) __b);
2192 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2193 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2195 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2196 (int16x8_t) __b);
2199 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2200 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2202 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2203 (int32x4_t) __b);
2206 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2207 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2209 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2212 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2213 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2215 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2218 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2219 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2221 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2224 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2225 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2227 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2228 (int8x8_t) __b);
2231 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2232 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2234 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2235 (int16x4_t) __b);
2238 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2239 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2241 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2242 (int32x2_t) __b);
2245 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2246 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2248 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2251 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2252 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2254 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2257 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2258 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2260 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2263 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2264 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2266 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2267 (int8x16_t) __b);
2270 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2271 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2273 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2274 (int16x8_t) __b);
2277 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2278 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2280 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2281 (int32x4_t) __b);
2284 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2285 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2287 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2290 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2291 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2293 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2296 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2297 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2299 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2302 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2303 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2305 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2308 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2309 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2311 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2314 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2315 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2317 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2320 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2321 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2323 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2326 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2327 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2329 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2333 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2335 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2339 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2341 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2344 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2345 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2347 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2350 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2351 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2353 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2356 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2357 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2359 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2362 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2363 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2365 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2369 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2371 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2374 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2375 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2377 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2380 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2381 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2383 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2386 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2387 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2389 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2392 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2393 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2395 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2398 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2399 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2401 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2404 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2405 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2407 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2410 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2411 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2413 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2416 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2417 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2419 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2422 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2423 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2425 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2428 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2429 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2431 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2434 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2435 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2437 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2441 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2443 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2446 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2447 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2449 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2453 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2455 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2458 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2459 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2461 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2464 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2465 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2467 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2470 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2471 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2473 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2476 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2477 vqneg_s8 (int8x8_t __a)
2479 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2482 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2483 vqneg_s16 (int16x4_t __a)
2485 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2488 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2489 vqneg_s32 (int32x2_t __a)
2491 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2494 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2495 vqneg_s64 (int64x1_t __a)
2497 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2500 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2501 vqnegq_s8 (int8x16_t __a)
2503 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2506 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2507 vqnegq_s16 (int16x8_t __a)
2509 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2512 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2513 vqnegq_s32 (int32x4_t __a)
2515 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2518 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2519 vqabs_s8 (int8x8_t __a)
2521 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2524 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2525 vqabs_s16 (int16x4_t __a)
2527 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2530 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2531 vqabs_s32 (int32x2_t __a)
2533 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2536 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2537 vqabs_s64 (int64x1_t __a)
2539 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2542 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2543 vqabsq_s8 (int8x16_t __a)
2545 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2548 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2549 vqabsq_s16 (int16x8_t __a)
2551 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2554 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2555 vqabsq_s32 (int32x4_t __a)
2557 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2560 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2561 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2563 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2566 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2567 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2569 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2572 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2573 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2575 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2578 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2579 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2581 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2584 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2585 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2587 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2590 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2591 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2593 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2596 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2597 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2599 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2602 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2603 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2605 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2608 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2609 vcreate_s8 (uint64_t __a)
2611 return (int8x8_t) __a;
2614 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2615 vcreate_s16 (uint64_t __a)
2617 return (int16x4_t) __a;
2620 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2621 vcreate_s32 (uint64_t __a)
2623 return (int32x2_t) __a;
2626 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2627 vcreate_s64 (uint64_t __a)
2629 return (int64x1_t) {__a};
2632 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2633 vcreate_f32 (uint64_t __a)
2635 return (float32x2_t) __a;
2638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2639 vcreate_u8 (uint64_t __a)
2641 return (uint8x8_t) __a;
2644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2645 vcreate_u16 (uint64_t __a)
2647 return (uint16x4_t) __a;
2650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2651 vcreate_u32 (uint64_t __a)
2653 return (uint32x2_t) __a;
2656 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2657 vcreate_u64 (uint64_t __a)
2659 return (uint64x1_t) {__a};
2662 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2663 vcreate_f64 (uint64_t __a)
2665 return __builtin_aarch64_createv1df (__a);
2668 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2669 vcreate_p8 (uint64_t __a)
2671 return (poly8x8_t) __a;
2674 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2675 vcreate_p16 (uint64_t __a)
2677 return (poly16x4_t) __a;
2680 /* vget_lane */
2682 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2683 vget_lane_f32 (float32x2_t __a, const int __b)
2685 return __aarch64_vget_lane_f32 (__a, __b);
2688 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2689 vget_lane_f64 (float64x1_t __a, const int __b)
2691 return __aarch64_vget_lane_f64 (__a, __b);
2694 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2695 vget_lane_p8 (poly8x8_t __a, const int __b)
2697 return __aarch64_vget_lane_p8 (__a, __b);
2700 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2701 vget_lane_p16 (poly16x4_t __a, const int __b)
2703 return __aarch64_vget_lane_p16 (__a, __b);
2706 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2707 vget_lane_s8 (int8x8_t __a, const int __b)
2709 return __aarch64_vget_lane_s8 (__a, __b);
2712 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2713 vget_lane_s16 (int16x4_t __a, const int __b)
2715 return __aarch64_vget_lane_s16 (__a, __b);
2718 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2719 vget_lane_s32 (int32x2_t __a, const int __b)
2721 return __aarch64_vget_lane_s32 (__a, __b);
2724 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2725 vget_lane_s64 (int64x1_t __a, const int __b)
2727 return __aarch64_vget_lane_s64 (__a, __b);
2730 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2731 vget_lane_u8 (uint8x8_t __a, const int __b)
2733 return __aarch64_vget_lane_u8 (__a, __b);
2736 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2737 vget_lane_u16 (uint16x4_t __a, const int __b)
2739 return __aarch64_vget_lane_u16 (__a, __b);
2742 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2743 vget_lane_u32 (uint32x2_t __a, const int __b)
2745 return __aarch64_vget_lane_u32 (__a, __b);
2748 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2749 vget_lane_u64 (uint64x1_t __a, const int __b)
2751 return __aarch64_vget_lane_u64 (__a, __b);
2754 /* vgetq_lane */
2756 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2757 vgetq_lane_f32 (float32x4_t __a, const int __b)
2759 return __aarch64_vgetq_lane_f32 (__a, __b);
2762 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2763 vgetq_lane_f64 (float64x2_t __a, const int __b)
2765 return __aarch64_vgetq_lane_f64 (__a, __b);
2768 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2769 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2771 return __aarch64_vgetq_lane_p8 (__a, __b);
2774 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2775 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2777 return __aarch64_vgetq_lane_p16 (__a, __b);
2780 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2781 vgetq_lane_s8 (int8x16_t __a, const int __b)
2783 return __aarch64_vgetq_lane_s8 (__a, __b);
2786 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2787 vgetq_lane_s16 (int16x8_t __a, const int __b)
2789 return __aarch64_vgetq_lane_s16 (__a, __b);
2792 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2793 vgetq_lane_s32 (int32x4_t __a, const int __b)
2795 return __aarch64_vgetq_lane_s32 (__a, __b);
2798 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2799 vgetq_lane_s64 (int64x2_t __a, const int __b)
2801 return __aarch64_vgetq_lane_s64 (__a, __b);
2804 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2805 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2807 return __aarch64_vgetq_lane_u8 (__a, __b);
2810 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2811 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2813 return __aarch64_vgetq_lane_u16 (__a, __b);
2816 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2817 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2819 return __aarch64_vgetq_lane_u32 (__a, __b);
2822 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2823 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2825 return __aarch64_vgetq_lane_u64 (__a, __b);
2828 /* vreinterpret */
2830 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2831 vreinterpret_p8_f64 (float64x1_t __a)
2833 return (poly8x8_t) __a;
2836 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2837 vreinterpret_p8_s8 (int8x8_t __a)
2839 return (poly8x8_t) __a;
2842 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2843 vreinterpret_p8_s16 (int16x4_t __a)
2845 return (poly8x8_t) __a;
2848 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2849 vreinterpret_p8_s32 (int32x2_t __a)
2851 return (poly8x8_t) __a;
2854 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2855 vreinterpret_p8_s64 (int64x1_t __a)
2857 return (poly8x8_t) __a;
2860 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2861 vreinterpret_p8_f32 (float32x2_t __a)
2863 return (poly8x8_t) __a;
2866 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2867 vreinterpret_p8_u8 (uint8x8_t __a)
2869 return (poly8x8_t) __a;
2872 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2873 vreinterpret_p8_u16 (uint16x4_t __a)
2875 return (poly8x8_t) __a;
2878 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2879 vreinterpret_p8_u32 (uint32x2_t __a)
2881 return (poly8x8_t) __a;
2884 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2885 vreinterpret_p8_u64 (uint64x1_t __a)
2887 return (poly8x8_t) __a;
2890 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2891 vreinterpret_p8_p16 (poly16x4_t __a)
2893 return (poly8x8_t) __a;
2896 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2897 vreinterpretq_p8_f64 (float64x2_t __a)
2899 return (poly8x16_t) __a;
2902 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2903 vreinterpretq_p8_s8 (int8x16_t __a)
2905 return (poly8x16_t) __a;
2908 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2909 vreinterpretq_p8_s16 (int16x8_t __a)
2911 return (poly8x16_t) __a;
2914 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2915 vreinterpretq_p8_s32 (int32x4_t __a)
2917 return (poly8x16_t) __a;
2920 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2921 vreinterpretq_p8_s64 (int64x2_t __a)
2923 return (poly8x16_t) __a;
2926 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2927 vreinterpretq_p8_f32 (float32x4_t __a)
2929 return (poly8x16_t) __a;
2932 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2933 vreinterpretq_p8_u8 (uint8x16_t __a)
2935 return (poly8x16_t) __a;
2938 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2939 vreinterpretq_p8_u16 (uint16x8_t __a)
2941 return (poly8x16_t) __a;
2944 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2945 vreinterpretq_p8_u32 (uint32x4_t __a)
2947 return (poly8x16_t) __a;
2950 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2951 vreinterpretq_p8_u64 (uint64x2_t __a)
2953 return (poly8x16_t) __a;
2956 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2957 vreinterpretq_p8_p16 (poly16x8_t __a)
2959 return (poly8x16_t) __a;
2962 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2963 vreinterpret_p16_f64 (float64x1_t __a)
2965 return (poly16x4_t) __a;
2968 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2969 vreinterpret_p16_s8 (int8x8_t __a)
2971 return (poly16x4_t) __a;
2974 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2975 vreinterpret_p16_s16 (int16x4_t __a)
2977 return (poly16x4_t) __a;
2980 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2981 vreinterpret_p16_s32 (int32x2_t __a)
2983 return (poly16x4_t) __a;
2986 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2987 vreinterpret_p16_s64 (int64x1_t __a)
2989 return (poly16x4_t) __a;
2992 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2993 vreinterpret_p16_f32 (float32x2_t __a)
2995 return (poly16x4_t) __a;
2998 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2999 vreinterpret_p16_u8 (uint8x8_t __a)
3001 return (poly16x4_t) __a;
3004 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3005 vreinterpret_p16_u16 (uint16x4_t __a)
3007 return (poly16x4_t) __a;
3010 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3011 vreinterpret_p16_u32 (uint32x2_t __a)
3013 return (poly16x4_t) __a;
3016 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3017 vreinterpret_p16_u64 (uint64x1_t __a)
3019 return (poly16x4_t) __a;
3022 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3023 vreinterpret_p16_p8 (poly8x8_t __a)
3025 return (poly16x4_t) __a;
3028 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3029 vreinterpretq_p16_f64 (float64x2_t __a)
3031 return (poly16x8_t) __a;
3034 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3035 vreinterpretq_p16_s8 (int8x16_t __a)
3037 return (poly16x8_t) __a;
3040 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3041 vreinterpretq_p16_s16 (int16x8_t __a)
3043 return (poly16x8_t) __a;
3046 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3047 vreinterpretq_p16_s32 (int32x4_t __a)
3049 return (poly16x8_t) __a;
3052 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3053 vreinterpretq_p16_s64 (int64x2_t __a)
3055 return (poly16x8_t) __a;
3058 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3059 vreinterpretq_p16_f32 (float32x4_t __a)
3061 return (poly16x8_t) __a;
3064 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3065 vreinterpretq_p16_u8 (uint8x16_t __a)
3067 return (poly16x8_t) __a;
3070 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3071 vreinterpretq_p16_u16 (uint16x8_t __a)
3073 return (poly16x8_t) __a;
3076 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3077 vreinterpretq_p16_u32 (uint32x4_t __a)
3079 return (poly16x8_t) __a;
3082 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3083 vreinterpretq_p16_u64 (uint64x2_t __a)
3085 return (poly16x8_t) __a;
3088 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3089 vreinterpretq_p16_p8 (poly8x16_t __a)
3091 return (poly16x8_t) __a;
3094 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3095 vreinterpret_f32_f64 (float64x1_t __a)
3097 return (float32x2_t) __a;
3100 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3101 vreinterpret_f32_s8 (int8x8_t __a)
3103 return (float32x2_t) __a;
3106 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3107 vreinterpret_f32_s16 (int16x4_t __a)
3109 return (float32x2_t) __a;
3112 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3113 vreinterpret_f32_s32 (int32x2_t __a)
3115 return (float32x2_t) __a;
3118 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3119 vreinterpret_f32_s64 (int64x1_t __a)
3121 return (float32x2_t) __a;
3124 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3125 vreinterpret_f32_u8 (uint8x8_t __a)
3127 return (float32x2_t) __a;
3130 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3131 vreinterpret_f32_u16 (uint16x4_t __a)
3133 return (float32x2_t) __a;
3136 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3137 vreinterpret_f32_u32 (uint32x2_t __a)
3139 return (float32x2_t) __a;
3142 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3143 vreinterpret_f32_u64 (uint64x1_t __a)
3145 return (float32x2_t) __a;
3148 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3149 vreinterpret_f32_p8 (poly8x8_t __a)
3151 return (float32x2_t) __a;
3154 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3155 vreinterpret_f32_p16 (poly16x4_t __a)
3157 return (float32x2_t) __a;
3160 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3161 vreinterpretq_f32_f64 (float64x2_t __a)
3163 return (float32x4_t) __a;
3166 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3167 vreinterpretq_f32_s8 (int8x16_t __a)
3169 return (float32x4_t) __a;
3172 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3173 vreinterpretq_f32_s16 (int16x8_t __a)
3175 return (float32x4_t) __a;
3178 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3179 vreinterpretq_f32_s32 (int32x4_t __a)
3181 return (float32x4_t) __a;
3184 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3185 vreinterpretq_f32_s64 (int64x2_t __a)
3187 return (float32x4_t) __a;
3190 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3191 vreinterpretq_f32_u8 (uint8x16_t __a)
3193 return (float32x4_t) __a;
3196 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3197 vreinterpretq_f32_u16 (uint16x8_t __a)
3199 return (float32x4_t) __a;
3202 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3203 vreinterpretq_f32_u32 (uint32x4_t __a)
3205 return (float32x4_t) __a;
3208 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3209 vreinterpretq_f32_u64 (uint64x2_t __a)
3211 return (float32x4_t) __a;
3214 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3215 vreinterpretq_f32_p8 (poly8x16_t __a)
3217 return (float32x4_t) __a;
3220 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3221 vreinterpretq_f32_p16 (poly16x8_t __a)
3223 return (float32x4_t) __a;
3226 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3227 vreinterpret_f64_f32 (float32x2_t __a)
3229 return (float64x1_t) __a;
3232 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3233 vreinterpret_f64_p8 (poly8x8_t __a)
3235 return (float64x1_t) __a;
3238 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3239 vreinterpret_f64_p16 (poly16x4_t __a)
3241 return (float64x1_t) __a;
3244 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3245 vreinterpret_f64_s8 (int8x8_t __a)
3247 return (float64x1_t) __a;
3250 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3251 vreinterpret_f64_s16 (int16x4_t __a)
3253 return (float64x1_t) __a;
3256 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3257 vreinterpret_f64_s32 (int32x2_t __a)
3259 return (float64x1_t) __a;
3262 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3263 vreinterpret_f64_s64 (int64x1_t __a)
3265 return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
3268 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3269 vreinterpret_f64_u8 (uint8x8_t __a)
3271 return (float64x1_t) __a;
3274 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3275 vreinterpret_f64_u16 (uint16x4_t __a)
3277 return (float64x1_t) __a;
3280 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3281 vreinterpret_f64_u32 (uint32x2_t __a)
3283 return (float64x1_t) __a;
3286 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3287 vreinterpret_f64_u64 (uint64x1_t __a)
3289 return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
3292 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3293 vreinterpretq_f64_f32 (float32x4_t __a)
3295 return (float64x2_t) __a;
3298 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3299 vreinterpretq_f64_p8 (poly8x16_t __a)
3301 return (float64x2_t) __a;
3304 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3305 vreinterpretq_f64_p16 (poly16x8_t __a)
3307 return (float64x2_t) __a;
3310 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3311 vreinterpretq_f64_s8 (int8x16_t __a)
3313 return (float64x2_t) __a;
3316 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3317 vreinterpretq_f64_s16 (int16x8_t __a)
3319 return (float64x2_t) __a;
3322 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3323 vreinterpretq_f64_s32 (int32x4_t __a)
3325 return (float64x2_t) __a;
3328 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3329 vreinterpretq_f64_s64 (int64x2_t __a)
3331 return (float64x2_t) __a;
3334 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3335 vreinterpretq_f64_u8 (uint8x16_t __a)
3337 return (float64x2_t) __a;
3340 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3341 vreinterpretq_f64_u16 (uint16x8_t __a)
3343 return (float64x2_t) __a;
3346 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3347 vreinterpretq_f64_u32 (uint32x4_t __a)
3349 return (float64x2_t) __a;
3352 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3353 vreinterpretq_f64_u64 (uint64x2_t __a)
3355 return (float64x2_t) __a;
3358 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3359 vreinterpret_s64_f64 (float64x1_t __a)
3361 return (int64x1_t) __a;
3364 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3365 vreinterpret_s64_s8 (int8x8_t __a)
3367 return (int64x1_t) __a;
3370 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3371 vreinterpret_s64_s16 (int16x4_t __a)
3373 return (int64x1_t) __a;
3376 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3377 vreinterpret_s64_s32 (int32x2_t __a)
3379 return (int64x1_t) __a;
3382 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3383 vreinterpret_s64_f32 (float32x2_t __a)
3385 return (int64x1_t) __a;
3388 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3389 vreinterpret_s64_u8 (uint8x8_t __a)
3391 return (int64x1_t) __a;
3394 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3395 vreinterpret_s64_u16 (uint16x4_t __a)
3397 return (int64x1_t) __a;
3400 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3401 vreinterpret_s64_u32 (uint32x2_t __a)
3403 return (int64x1_t) __a;
3406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3407 vreinterpret_s64_u64 (uint64x1_t __a)
3409 return (int64x1_t) __a;
3412 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3413 vreinterpret_s64_p8 (poly8x8_t __a)
3415 return (int64x1_t) __a;
3418 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3419 vreinterpret_s64_p16 (poly16x4_t __a)
3421 return (int64x1_t) __a;
3424 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3425 vreinterpretq_s64_f64 (float64x2_t __a)
3427 return (int64x2_t) __a;
3430 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3431 vreinterpretq_s64_s8 (int8x16_t __a)
3433 return (int64x2_t) __a;
3436 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3437 vreinterpretq_s64_s16 (int16x8_t __a)
3439 return (int64x2_t) __a;
3442 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3443 vreinterpretq_s64_s32 (int32x4_t __a)
3445 return (int64x2_t) __a;
3448 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3449 vreinterpretq_s64_f32 (float32x4_t __a)
3451 return (int64x2_t) __a;
3454 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3455 vreinterpretq_s64_u8 (uint8x16_t __a)
3457 return (int64x2_t) __a;
3460 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3461 vreinterpretq_s64_u16 (uint16x8_t __a)
3463 return (int64x2_t) __a;
3466 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3467 vreinterpretq_s64_u32 (uint32x4_t __a)
3469 return (int64x2_t) __a;
3472 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3473 vreinterpretq_s64_u64 (uint64x2_t __a)
3475 return (int64x2_t) __a;
3478 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3479 vreinterpretq_s64_p8 (poly8x16_t __a)
3481 return (int64x2_t) __a;
3484 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3485 vreinterpretq_s64_p16 (poly16x8_t __a)
3487 return (int64x2_t) __a;
3490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3491 vreinterpret_u64_f64 (float64x1_t __a)
3493 return (uint64x1_t) __a;
3496 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3497 vreinterpret_u64_s8 (int8x8_t __a)
3499 return (uint64x1_t) __a;
3502 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3503 vreinterpret_u64_s16 (int16x4_t __a)
3505 return (uint64x1_t) __a;
3508 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3509 vreinterpret_u64_s32 (int32x2_t __a)
3511 return (uint64x1_t) __a;
3514 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3515 vreinterpret_u64_s64 (int64x1_t __a)
3517 return (uint64x1_t) __a;
3520 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3521 vreinterpret_u64_f32 (float32x2_t __a)
3523 return (uint64x1_t) __a;
3526 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3527 vreinterpret_u64_u8 (uint8x8_t __a)
3529 return (uint64x1_t) __a;
3532 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3533 vreinterpret_u64_u16 (uint16x4_t __a)
3535 return (uint64x1_t) __a;
3538 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3539 vreinterpret_u64_u32 (uint32x2_t __a)
3541 return (uint64x1_t) __a;
3544 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3545 vreinterpret_u64_p8 (poly8x8_t __a)
3547 return (uint64x1_t) __a;
3550 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3551 vreinterpret_u64_p16 (poly16x4_t __a)
3553 return (uint64x1_t) __a;
3556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3557 vreinterpretq_u64_f64 (float64x2_t __a)
3559 return (uint64x2_t) __a;
3562 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3563 vreinterpretq_u64_s8 (int8x16_t __a)
3565 return (uint64x2_t) __a;
3568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3569 vreinterpretq_u64_s16 (int16x8_t __a)
3571 return (uint64x2_t) __a;
3574 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3575 vreinterpretq_u64_s32 (int32x4_t __a)
3577 return (uint64x2_t) __a;
3580 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3581 vreinterpretq_u64_s64 (int64x2_t __a)
3583 return (uint64x2_t) __a;
3586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3587 vreinterpretq_u64_f32 (float32x4_t __a)
3589 return (uint64x2_t) __a;
3592 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3593 vreinterpretq_u64_u8 (uint8x16_t __a)
3595 return (uint64x2_t) __a;
3598 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3599 vreinterpretq_u64_u16 (uint16x8_t __a)
3601 return (uint64x2_t) __a;
3604 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3605 vreinterpretq_u64_u32 (uint32x4_t __a)
3607 return (uint64x2_t) __a;
3610 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3611 vreinterpretq_u64_p8 (poly8x16_t __a)
3613 return (uint64x2_t) __a;
3616 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3617 vreinterpretq_u64_p16 (poly16x8_t __a)
3619 return (uint64x2_t) __a;
3622 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3623 vreinterpret_s8_f64 (float64x1_t __a)
3625 return (int8x8_t) __a;
3628 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3629 vreinterpret_s8_s16 (int16x4_t __a)
3631 return (int8x8_t) __a;
3634 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3635 vreinterpret_s8_s32 (int32x2_t __a)
3637 return (int8x8_t) __a;
3640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3641 vreinterpret_s8_s64 (int64x1_t __a)
3643 return (int8x8_t) __a;
3646 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3647 vreinterpret_s8_f32 (float32x2_t __a)
3649 return (int8x8_t) __a;
3652 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3653 vreinterpret_s8_u8 (uint8x8_t __a)
3655 return (int8x8_t) __a;
3658 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3659 vreinterpret_s8_u16 (uint16x4_t __a)
3661 return (int8x8_t) __a;
3664 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3665 vreinterpret_s8_u32 (uint32x2_t __a)
3667 return (int8x8_t) __a;
3670 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3671 vreinterpret_s8_u64 (uint64x1_t __a)
3673 return (int8x8_t) __a;
3676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3677 vreinterpret_s8_p8 (poly8x8_t __a)
3679 return (int8x8_t) __a;
3682 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3683 vreinterpret_s8_p16 (poly16x4_t __a)
3685 return (int8x8_t) __a;
3688 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3689 vreinterpretq_s8_f64 (float64x2_t __a)
3691 return (int8x16_t) __a;
3694 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3695 vreinterpretq_s8_s16 (int16x8_t __a)
3697 return (int8x16_t) __a;
3700 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3701 vreinterpretq_s8_s32 (int32x4_t __a)
3703 return (int8x16_t) __a;
3706 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3707 vreinterpretq_s8_s64 (int64x2_t __a)
3709 return (int8x16_t) __a;
3712 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3713 vreinterpretq_s8_f32 (float32x4_t __a)
3715 return (int8x16_t) __a;
3718 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3719 vreinterpretq_s8_u8 (uint8x16_t __a)
3721 return (int8x16_t) __a;
3724 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3725 vreinterpretq_s8_u16 (uint16x8_t __a)
3727 return (int8x16_t) __a;
3730 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3731 vreinterpretq_s8_u32 (uint32x4_t __a)
3733 return (int8x16_t) __a;
3736 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3737 vreinterpretq_s8_u64 (uint64x2_t __a)
3739 return (int8x16_t) __a;
3742 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3743 vreinterpretq_s8_p8 (poly8x16_t __a)
3745 return (int8x16_t) __a;
3748 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3749 vreinterpretq_s8_p16 (poly16x8_t __a)
3751 return (int8x16_t) __a;
3754 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3755 vreinterpret_s16_f64 (float64x1_t __a)
3757 return (int16x4_t) __a;
3760 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3761 vreinterpret_s16_s8 (int8x8_t __a)
3763 return (int16x4_t) __a;
3766 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3767 vreinterpret_s16_s32 (int32x2_t __a)
3769 return (int16x4_t) __a;
3772 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3773 vreinterpret_s16_s64 (int64x1_t __a)
3775 return (int16x4_t) __a;
3778 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3779 vreinterpret_s16_f32 (float32x2_t __a)
3781 return (int16x4_t) __a;
3784 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3785 vreinterpret_s16_u8 (uint8x8_t __a)
3787 return (int16x4_t) __a;
3790 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3791 vreinterpret_s16_u16 (uint16x4_t __a)
3793 return (int16x4_t) __a;
3796 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3797 vreinterpret_s16_u32 (uint32x2_t __a)
3799 return (int16x4_t) __a;
3802 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3803 vreinterpret_s16_u64 (uint64x1_t __a)
3805 return (int16x4_t) __a;
3808 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3809 vreinterpret_s16_p8 (poly8x8_t __a)
3811 return (int16x4_t) __a;
3814 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3815 vreinterpret_s16_p16 (poly16x4_t __a)
3817 return (int16x4_t) __a;
3820 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3821 vreinterpretq_s16_f64 (float64x2_t __a)
3823 return (int16x8_t) __a;
3826 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3827 vreinterpretq_s16_s8 (int8x16_t __a)
3829 return (int16x8_t) __a;
3832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3833 vreinterpretq_s16_s32 (int32x4_t __a)
3835 return (int16x8_t) __a;
3838 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3839 vreinterpretq_s16_s64 (int64x2_t __a)
3841 return (int16x8_t) __a;
3844 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3845 vreinterpretq_s16_f32 (float32x4_t __a)
3847 return (int16x8_t) __a;
3850 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3851 vreinterpretq_s16_u8 (uint8x16_t __a)
3853 return (int16x8_t) __a;
3856 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3857 vreinterpretq_s16_u16 (uint16x8_t __a)
3859 return (int16x8_t) __a;
3862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3863 vreinterpretq_s16_u32 (uint32x4_t __a)
3865 return (int16x8_t) __a;
3868 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3869 vreinterpretq_s16_u64 (uint64x2_t __a)
3871 return (int16x8_t) __a;
3874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3875 vreinterpretq_s16_p8 (poly8x16_t __a)
3877 return (int16x8_t) __a;
3880 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3881 vreinterpretq_s16_p16 (poly16x8_t __a)
3883 return (int16x8_t) __a;
3886 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3887 vreinterpret_s32_f64 (float64x1_t __a)
3889 return (int32x2_t) __a;
3892 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3893 vreinterpret_s32_s8 (int8x8_t __a)
3895 return (int32x2_t) __a;
3898 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3899 vreinterpret_s32_s16 (int16x4_t __a)
3901 return (int32x2_t) __a;
3904 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3905 vreinterpret_s32_s64 (int64x1_t __a)
3907 return (int32x2_t) __a;
3910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3911 vreinterpret_s32_f32 (float32x2_t __a)
3913 return (int32x2_t) __a;
3916 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3917 vreinterpret_s32_u8 (uint8x8_t __a)
3919 return (int32x2_t) __a;
3922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3923 vreinterpret_s32_u16 (uint16x4_t __a)
3925 return (int32x2_t) __a;
3928 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3929 vreinterpret_s32_u32 (uint32x2_t __a)
3931 return (int32x2_t) __a;
3934 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3935 vreinterpret_s32_u64 (uint64x1_t __a)
3937 return (int32x2_t) __a;
3940 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3941 vreinterpret_s32_p8 (poly8x8_t __a)
3943 return (int32x2_t) __a;
3946 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3947 vreinterpret_s32_p16 (poly16x4_t __a)
3949 return (int32x2_t) __a;
3952 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3953 vreinterpretq_s32_f64 (float64x2_t __a)
3955 return (int32x4_t) __a;
3958 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3959 vreinterpretq_s32_s8 (int8x16_t __a)
3961 return (int32x4_t) __a;
3964 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3965 vreinterpretq_s32_s16 (int16x8_t __a)
3967 return (int32x4_t) __a;
3970 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3971 vreinterpretq_s32_s64 (int64x2_t __a)
3973 return (int32x4_t) __a;
3976 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3977 vreinterpretq_s32_f32 (float32x4_t __a)
3979 return (int32x4_t) __a;
3982 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3983 vreinterpretq_s32_u8 (uint8x16_t __a)
3985 return (int32x4_t) __a;
3988 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3989 vreinterpretq_s32_u16 (uint16x8_t __a)
3991 return (int32x4_t) __a;
3994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3995 vreinterpretq_s32_u32 (uint32x4_t __a)
3997 return (int32x4_t) __a;
4000 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4001 vreinterpretq_s32_u64 (uint64x2_t __a)
4003 return (int32x4_t) __a;
4006 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4007 vreinterpretq_s32_p8 (poly8x16_t __a)
4009 return (int32x4_t) __a;
4012 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4013 vreinterpretq_s32_p16 (poly16x8_t __a)
4015 return (int32x4_t) __a;
4018 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4019 vreinterpret_u8_f64 (float64x1_t __a)
4021 return (uint8x8_t) __a;
4024 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4025 vreinterpret_u8_s8 (int8x8_t __a)
4027 return (uint8x8_t) __a;
4030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4031 vreinterpret_u8_s16 (int16x4_t __a)
4033 return (uint8x8_t) __a;
4036 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4037 vreinterpret_u8_s32 (int32x2_t __a)
4039 return (uint8x8_t) __a;
4042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4043 vreinterpret_u8_s64 (int64x1_t __a)
4045 return (uint8x8_t) __a;
4048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4049 vreinterpret_u8_f32 (float32x2_t __a)
4051 return (uint8x8_t) __a;
4054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4055 vreinterpret_u8_u16 (uint16x4_t __a)
4057 return (uint8x8_t) __a;
4060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4061 vreinterpret_u8_u32 (uint32x2_t __a)
4063 return (uint8x8_t) __a;
4066 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4067 vreinterpret_u8_u64 (uint64x1_t __a)
4069 return (uint8x8_t) __a;
4072 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4073 vreinterpret_u8_p8 (poly8x8_t __a)
4075 return (uint8x8_t) __a;
4078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4079 vreinterpret_u8_p16 (poly16x4_t __a)
4081 return (uint8x8_t) __a;
4084 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4085 vreinterpretq_u8_f64 (float64x2_t __a)
4087 return (uint8x16_t) __a;
4090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4091 vreinterpretq_u8_s8 (int8x16_t __a)
4093 return (uint8x16_t) __a;
4096 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4097 vreinterpretq_u8_s16 (int16x8_t __a)
4099 return (uint8x16_t) __a;
4102 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4103 vreinterpretq_u8_s32 (int32x4_t __a)
4105 return (uint8x16_t) __a;
4108 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4109 vreinterpretq_u8_s64 (int64x2_t __a)
4111 return (uint8x16_t) __a;
4114 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4115 vreinterpretq_u8_f32 (float32x4_t __a)
4117 return (uint8x16_t) __a;
4120 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4121 vreinterpretq_u8_u16 (uint16x8_t __a)
4123 return (uint8x16_t) __a;
4126 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4127 vreinterpretq_u8_u32 (uint32x4_t __a)
4129 return (uint8x16_t) __a;
4132 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4133 vreinterpretq_u8_u64 (uint64x2_t __a)
4135 return (uint8x16_t) __a;
4138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4139 vreinterpretq_u8_p8 (poly8x16_t __a)
4141 return (uint8x16_t) __a;
4144 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4145 vreinterpretq_u8_p16 (poly16x8_t __a)
4147 return (uint8x16_t) __a;
4150 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4151 vreinterpret_u16_f64 (float64x1_t __a)
4153 return (uint16x4_t) __a;
4156 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4157 vreinterpret_u16_s8 (int8x8_t __a)
4159 return (uint16x4_t) __a;
4162 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4163 vreinterpret_u16_s16 (int16x4_t __a)
4165 return (uint16x4_t) __a;
4168 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4169 vreinterpret_u16_s32 (int32x2_t __a)
4171 return (uint16x4_t) __a;
4174 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4175 vreinterpret_u16_s64 (int64x1_t __a)
4177 return (uint16x4_t) __a;
4180 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4181 vreinterpret_u16_f32 (float32x2_t __a)
4183 return (uint16x4_t) __a;
4186 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4187 vreinterpret_u16_u8 (uint8x8_t __a)
4189 return (uint16x4_t) __a;
4192 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4193 vreinterpret_u16_u32 (uint32x2_t __a)
4195 return (uint16x4_t) __a;
4198 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4199 vreinterpret_u16_u64 (uint64x1_t __a)
4201 return (uint16x4_t) __a;
4204 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4205 vreinterpret_u16_p8 (poly8x8_t __a)
4207 return (uint16x4_t) __a;
4210 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4211 vreinterpret_u16_p16 (poly16x4_t __a)
4213 return (uint16x4_t) __a;
4216 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4217 vreinterpretq_u16_f64 (float64x2_t __a)
4219 return (uint16x8_t) __a;
4222 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4223 vreinterpretq_u16_s8 (int8x16_t __a)
4225 return (uint16x8_t) __a;
4228 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4229 vreinterpretq_u16_s16 (int16x8_t __a)
4231 return (uint16x8_t) __a;
4234 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4235 vreinterpretq_u16_s32 (int32x4_t __a)
4237 return (uint16x8_t) __a;
4240 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4241 vreinterpretq_u16_s64 (int64x2_t __a)
4243 return (uint16x8_t) __a;
4246 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4247 vreinterpretq_u16_f32 (float32x4_t __a)
4249 return (uint16x8_t) __a;
4252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4253 vreinterpretq_u16_u8 (uint8x16_t __a)
4255 return (uint16x8_t) __a;
4258 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4259 vreinterpretq_u16_u32 (uint32x4_t __a)
4261 return (uint16x8_t) __a;
4264 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4265 vreinterpretq_u16_u64 (uint64x2_t __a)
4267 return (uint16x8_t) __a;
4270 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4271 vreinterpretq_u16_p8 (poly8x16_t __a)
4273 return (uint16x8_t) __a;
4276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4277 vreinterpretq_u16_p16 (poly16x8_t __a)
4279 return (uint16x8_t) __a;
4282 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4283 vreinterpret_u32_f64 (float64x1_t __a)
4285 return (uint32x2_t) __a;
4288 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4289 vreinterpret_u32_s8 (int8x8_t __a)
4291 return (uint32x2_t) __a;
4294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4295 vreinterpret_u32_s16 (int16x4_t __a)
4297 return (uint32x2_t) __a;
4300 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4301 vreinterpret_u32_s32 (int32x2_t __a)
4303 return (uint32x2_t) __a;
4306 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4307 vreinterpret_u32_s64 (int64x1_t __a)
4309 return (uint32x2_t) __a;
4312 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4313 vreinterpret_u32_f32 (float32x2_t __a)
4315 return (uint32x2_t) __a;
4318 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4319 vreinterpret_u32_u8 (uint8x8_t __a)
4321 return (uint32x2_t) __a;
4324 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4325 vreinterpret_u32_u16 (uint16x4_t __a)
4327 return (uint32x2_t) __a;
4330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4331 vreinterpret_u32_u64 (uint64x1_t __a)
4333 return (uint32x2_t) __a;
4336 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4337 vreinterpret_u32_p8 (poly8x8_t __a)
4339 return (uint32x2_t) __a;
4342 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4343 vreinterpret_u32_p16 (poly16x4_t __a)
4345 return (uint32x2_t) __a;
4348 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4349 vreinterpretq_u32_f64 (float64x2_t __a)
4351 return (uint32x4_t) __a;
4354 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4355 vreinterpretq_u32_s8 (int8x16_t __a)
4357 return (uint32x4_t) __a;
4360 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4361 vreinterpretq_u32_s16 (int16x8_t __a)
4363 return (uint32x4_t) __a;
4366 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4367 vreinterpretq_u32_s32 (int32x4_t __a)
4369 return (uint32x4_t) __a;
4372 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4373 vreinterpretq_u32_s64 (int64x2_t __a)
4375 return (uint32x4_t) __a;
4378 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4379 vreinterpretq_u32_f32 (float32x4_t __a)
4381 return (uint32x4_t) __a;
4384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4385 vreinterpretq_u32_u8 (uint8x16_t __a)
4387 return (uint32x4_t) __a;
4390 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4391 vreinterpretq_u32_u16 (uint16x8_t __a)
4393 return (uint32x4_t) __a;
4396 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4397 vreinterpretq_u32_u64 (uint64x2_t __a)
4399 return (uint32x4_t) __a;
4402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4403 vreinterpretq_u32_p8 (poly8x16_t __a)
4405 return (uint32x4_t) __a;
4408 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4409 vreinterpretq_u32_p16 (poly16x8_t __a)
4411 return (uint32x4_t) __a;
4414 #define __GET_LOW(__TYPE) \
4415 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4416 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4417 return vreinterpret_##__TYPE##_u64 (lo);
4419 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4420 vget_low_f32 (float32x4_t __a)
4422 __GET_LOW (f32);
4425 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4426 vget_low_f64 (float64x2_t __a)
4428 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4431 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4432 vget_low_p8 (poly8x16_t __a)
4434 __GET_LOW (p8);
4437 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4438 vget_low_p16 (poly16x8_t __a)
4440 __GET_LOW (p16);
4443 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4444 vget_low_s8 (int8x16_t __a)
4446 __GET_LOW (s8);
4449 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4450 vget_low_s16 (int16x8_t __a)
4452 __GET_LOW (s16);
4455 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4456 vget_low_s32 (int32x4_t __a)
4458 __GET_LOW (s32);
4461 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4462 vget_low_s64 (int64x2_t __a)
4464 __GET_LOW (s64);
4467 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4468 vget_low_u8 (uint8x16_t __a)
4470 __GET_LOW (u8);
4473 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4474 vget_low_u16 (uint16x8_t __a)
4476 __GET_LOW (u16);
4479 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4480 vget_low_u32 (uint32x4_t __a)
4482 __GET_LOW (u32);
4485 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4486 vget_low_u64 (uint64x2_t __a)
4488 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4491 #undef __GET_LOW
4493 #define __GET_HIGH(__TYPE) \
4494 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4495 uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
4496 return vreinterpret_##__TYPE##_u64 (hi);
4498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4499 vget_high_f32 (float32x4_t __a)
4501 __GET_HIGH (f32);
4504 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4505 vget_high_f64 (float64x2_t __a)
4507 __GET_HIGH (f64);
4510 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4511 vget_high_p8 (poly8x16_t __a)
4513 __GET_HIGH (p8);
4516 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4517 vget_high_p16 (poly16x8_t __a)
4519 __GET_HIGH (p16);
4522 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4523 vget_high_s8 (int8x16_t __a)
4525 __GET_HIGH (s8);
4528 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4529 vget_high_s16 (int16x8_t __a)
4531 __GET_HIGH (s16);
4534 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4535 vget_high_s32 (int32x4_t __a)
4537 __GET_HIGH (s32);
4540 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4541 vget_high_s64 (int64x2_t __a)
4543 __GET_HIGH (s64);
4546 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4547 vget_high_u8 (uint8x16_t __a)
4549 __GET_HIGH (u8);
4552 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4553 vget_high_u16 (uint16x8_t __a)
4555 __GET_HIGH (u16);
4558 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4559 vget_high_u32 (uint32x4_t __a)
4561 __GET_HIGH (u32);
4564 #undef __GET_HIGH
4566 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4567 vget_high_u64 (uint64x2_t __a)
4569 return vcreate_u64 (vgetq_lane_u64 (__a, 1));
4572 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4573 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4575 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4578 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4579 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4581 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4584 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4585 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4587 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4590 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4591 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4593 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4596 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4597 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4599 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4602 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4603 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4605 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4606 (int8x8_t) __b);
4609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4610 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4612 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4613 (int16x4_t) __b);
4616 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4617 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4619 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4620 (int32x2_t) __b);
4623 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4624 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4626 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4629 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4630 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4632 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4635 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4636 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4638 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4639 (int8x8_t) __b);
4642 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4643 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4645 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4646 (int16x4_t) __b);
4649 /* Start of temporary inline asm implementations. */
4651 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4652 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4654 int8x8_t result;
4655 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4656 : "=w"(result)
4657 : "0"(a), "w"(b), "w"(c)
4658 : /* No clobbers */);
4659 return result;
4662 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4663 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4665 int16x4_t result;
4666 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4667 : "=w"(result)
4668 : "0"(a), "w"(b), "w"(c)
4669 : /* No clobbers */);
4670 return result;
4673 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4674 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4676 int32x2_t result;
4677 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4678 : "=w"(result)
4679 : "0"(a), "w"(b), "w"(c)
4680 : /* No clobbers */);
4681 return result;
4684 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4685 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4687 uint8x8_t result;
4688 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4689 : "=w"(result)
4690 : "0"(a), "w"(b), "w"(c)
4691 : /* No clobbers */);
4692 return result;
4695 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4696 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4698 uint16x4_t result;
4699 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4700 : "=w"(result)
4701 : "0"(a), "w"(b), "w"(c)
4702 : /* No clobbers */);
4703 return result;
4706 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4707 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4709 uint32x2_t result;
4710 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4711 : "=w"(result)
4712 : "0"(a), "w"(b), "w"(c)
4713 : /* No clobbers */);
4714 return result;
4717 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4718 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4720 int16x8_t result;
4721 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4722 : "=w"(result)
4723 : "0"(a), "w"(b), "w"(c)
4724 : /* No clobbers */);
4725 return result;
4728 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4729 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4731 int32x4_t result;
4732 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4733 : "=w"(result)
4734 : "0"(a), "w"(b), "w"(c)
4735 : /* No clobbers */);
4736 return result;
4739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4740 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4742 int64x2_t result;
4743 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4744 : "=w"(result)
4745 : "0"(a), "w"(b), "w"(c)
4746 : /* No clobbers */);
4747 return result;
4750 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4751 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4753 uint16x8_t result;
4754 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4755 : "=w"(result)
4756 : "0"(a), "w"(b), "w"(c)
4757 : /* No clobbers */);
4758 return result;
4761 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4762 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4764 uint32x4_t result;
4765 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4766 : "=w"(result)
4767 : "0"(a), "w"(b), "w"(c)
4768 : /* No clobbers */);
4769 return result;
4772 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4773 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4775 uint64x2_t result;
4776 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4777 : "=w"(result)
4778 : "0"(a), "w"(b), "w"(c)
4779 : /* No clobbers */);
4780 return result;
4783 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4784 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4786 int16x8_t result;
4787 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4788 : "=w"(result)
4789 : "0"(a), "w"(b), "w"(c)
4790 : /* No clobbers */);
4791 return result;
4794 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4795 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4797 int32x4_t result;
4798 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4799 : "=w"(result)
4800 : "0"(a), "w"(b), "w"(c)
4801 : /* No clobbers */);
4802 return result;
4805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4806 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4808 int64x2_t result;
4809 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4810 : "=w"(result)
4811 : "0"(a), "w"(b), "w"(c)
4812 : /* No clobbers */);
4813 return result;
4816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4817 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4819 uint16x8_t result;
4820 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4821 : "=w"(result)
4822 : "0"(a), "w"(b), "w"(c)
4823 : /* No clobbers */);
4824 return result;
4827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4828 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4830 uint32x4_t result;
4831 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4832 : "=w"(result)
4833 : "0"(a), "w"(b), "w"(c)
4834 : /* No clobbers */);
4835 return result;
4838 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4839 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4841 uint64x2_t result;
4842 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4843 : "=w"(result)
4844 : "0"(a), "w"(b), "w"(c)
4845 : /* No clobbers */);
4846 return result;
4849 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4850 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4852 int8x16_t result;
4853 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4854 : "=w"(result)
4855 : "0"(a), "w"(b), "w"(c)
4856 : /* No clobbers */);
4857 return result;
4860 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4861 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4863 int16x8_t result;
4864 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4865 : "=w"(result)
4866 : "0"(a), "w"(b), "w"(c)
4867 : /* No clobbers */);
4868 return result;
4871 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4872 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4874 int32x4_t result;
4875 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4876 : "=w"(result)
4877 : "0"(a), "w"(b), "w"(c)
4878 : /* No clobbers */);
4879 return result;
4882 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4883 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4885 uint8x16_t result;
4886 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4887 : "=w"(result)
4888 : "0"(a), "w"(b), "w"(c)
4889 : /* No clobbers */);
4890 return result;
4893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4894 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4896 uint16x8_t result;
4897 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4898 : "=w"(result)
4899 : "0"(a), "w"(b), "w"(c)
4900 : /* No clobbers */);
4901 return result;
4904 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4905 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4907 uint32x4_t result;
4908 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4909 : "=w"(result)
4910 : "0"(a), "w"(b), "w"(c)
4911 : /* No clobbers */);
4912 return result;
4915 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4916 vabd_f32 (float32x2_t a, float32x2_t b)
4918 float32x2_t result;
4919 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4920 : "=w"(result)
4921 : "w"(a), "w"(b)
4922 : /* No clobbers */);
4923 return result;
4926 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4927 vabd_s8 (int8x8_t a, int8x8_t b)
4929 int8x8_t result;
4930 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4931 : "=w"(result)
4932 : "w"(a), "w"(b)
4933 : /* No clobbers */);
4934 return result;
4937 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4938 vabd_s16 (int16x4_t a, int16x4_t b)
4940 int16x4_t result;
4941 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4942 : "=w"(result)
4943 : "w"(a), "w"(b)
4944 : /* No clobbers */);
4945 return result;
4948 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4949 vabd_s32 (int32x2_t a, int32x2_t b)
4951 int32x2_t result;
4952 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4953 : "=w"(result)
4954 : "w"(a), "w"(b)
4955 : /* No clobbers */);
4956 return result;
4959 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4960 vabd_u8 (uint8x8_t a, uint8x8_t b)
4962 uint8x8_t result;
4963 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4964 : "=w"(result)
4965 : "w"(a), "w"(b)
4966 : /* No clobbers */);
4967 return result;
4970 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4971 vabd_u16 (uint16x4_t a, uint16x4_t b)
4973 uint16x4_t result;
4974 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4975 : "=w"(result)
4976 : "w"(a), "w"(b)
4977 : /* No clobbers */);
4978 return result;
4981 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4982 vabd_u32 (uint32x2_t a, uint32x2_t b)
4984 uint32x2_t result;
4985 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4986 : "=w"(result)
4987 : "w"(a), "w"(b)
4988 : /* No clobbers */);
4989 return result;
4992 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4993 vabdd_f64 (float64_t a, float64_t b)
4995 float64_t result;
4996 __asm__ ("fabd %d0, %d1, %d2"
4997 : "=w"(result)
4998 : "w"(a), "w"(b)
4999 : /* No clobbers */);
5000 return result;
5003 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5004 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5006 int16x8_t result;
5007 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5008 : "=w"(result)
5009 : "w"(a), "w"(b)
5010 : /* No clobbers */);
5011 return result;
5014 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5015 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5017 int32x4_t result;
5018 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5019 : "=w"(result)
5020 : "w"(a), "w"(b)
5021 : /* No clobbers */);
5022 return result;
5025 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5026 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5028 int64x2_t result;
5029 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5030 : "=w"(result)
5031 : "w"(a), "w"(b)
5032 : /* No clobbers */);
5033 return result;
5036 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5037 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5039 uint16x8_t result;
5040 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5041 : "=w"(result)
5042 : "w"(a), "w"(b)
5043 : /* No clobbers */);
5044 return result;
5047 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5048 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5050 uint32x4_t result;
5051 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5052 : "=w"(result)
5053 : "w"(a), "w"(b)
5054 : /* No clobbers */);
5055 return result;
5058 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5059 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5061 uint64x2_t result;
5062 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5063 : "=w"(result)
5064 : "w"(a), "w"(b)
5065 : /* No clobbers */);
5066 return result;
5069 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5070 vabdl_s8 (int8x8_t a, int8x8_t b)
5072 int16x8_t result;
5073 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5074 : "=w"(result)
5075 : "w"(a), "w"(b)
5076 : /* No clobbers */);
5077 return result;
5080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5081 vabdl_s16 (int16x4_t a, int16x4_t b)
5083 int32x4_t result;
5084 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5085 : "=w"(result)
5086 : "w"(a), "w"(b)
5087 : /* No clobbers */);
5088 return result;
5091 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5092 vabdl_s32 (int32x2_t a, int32x2_t b)
5094 int64x2_t result;
5095 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5096 : "=w"(result)
5097 : "w"(a), "w"(b)
5098 : /* No clobbers */);
5099 return result;
5102 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5103 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5105 uint16x8_t result;
5106 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5107 : "=w"(result)
5108 : "w"(a), "w"(b)
5109 : /* No clobbers */);
5110 return result;
5113 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5114 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5116 uint32x4_t result;
5117 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5118 : "=w"(result)
5119 : "w"(a), "w"(b)
5120 : /* No clobbers */);
5121 return result;
5124 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5125 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5127 uint64x2_t result;
5128 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5129 : "=w"(result)
5130 : "w"(a), "w"(b)
5131 : /* No clobbers */);
5132 return result;
5135 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5136 vabdq_f32 (float32x4_t a, float32x4_t b)
5138 float32x4_t result;
5139 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
5140 : "=w"(result)
5141 : "w"(a), "w"(b)
5142 : /* No clobbers */);
5143 return result;
5146 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5147 vabdq_f64 (float64x2_t a, float64x2_t b)
5149 float64x2_t result;
5150 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
5151 : "=w"(result)
5152 : "w"(a), "w"(b)
5153 : /* No clobbers */);
5154 return result;
5157 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5158 vabdq_s8 (int8x16_t a, int8x16_t b)
5160 int8x16_t result;
5161 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5162 : "=w"(result)
5163 : "w"(a), "w"(b)
5164 : /* No clobbers */);
5165 return result;
5168 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5169 vabdq_s16 (int16x8_t a, int16x8_t b)
5171 int16x8_t result;
5172 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5173 : "=w"(result)
5174 : "w"(a), "w"(b)
5175 : /* No clobbers */);
5176 return result;
5179 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5180 vabdq_s32 (int32x4_t a, int32x4_t b)
5182 int32x4_t result;
5183 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5184 : "=w"(result)
5185 : "w"(a), "w"(b)
5186 : /* No clobbers */);
5187 return result;
5190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5191 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5193 uint8x16_t result;
5194 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5195 : "=w"(result)
5196 : "w"(a), "w"(b)
5197 : /* No clobbers */);
5198 return result;
5201 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5202 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5204 uint16x8_t result;
5205 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5206 : "=w"(result)
5207 : "w"(a), "w"(b)
5208 : /* No clobbers */);
5209 return result;
5212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5213 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5215 uint32x4_t result;
5216 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5217 : "=w"(result)
5218 : "w"(a), "w"(b)
5219 : /* No clobbers */);
5220 return result;
5223 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5224 vabds_f32 (float32_t a, float32_t b)
5226 float32_t result;
5227 __asm__ ("fabd %s0, %s1, %s2"
5228 : "=w"(result)
5229 : "w"(a), "w"(b)
5230 : /* No clobbers */);
5231 return result;
5234 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5235 vaddlv_s8 (int8x8_t a)
5237 int16_t result;
5238 __asm__ ("saddlv %h0,%1.8b"
5239 : "=w"(result)
5240 : "w"(a)
5241 : /* No clobbers */);
5242 return result;
5245 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5246 vaddlv_s16 (int16x4_t a)
5248 int32_t result;
5249 __asm__ ("saddlv %s0,%1.4h"
5250 : "=w"(result)
5251 : "w"(a)
5252 : /* No clobbers */);
5253 return result;
5256 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5257 vaddlv_u8 (uint8x8_t a)
5259 uint16_t result;
5260 __asm__ ("uaddlv %h0,%1.8b"
5261 : "=w"(result)
5262 : "w"(a)
5263 : /* No clobbers */);
5264 return result;
5267 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5268 vaddlv_u16 (uint16x4_t a)
5270 uint32_t result;
5271 __asm__ ("uaddlv %s0,%1.4h"
5272 : "=w"(result)
5273 : "w"(a)
5274 : /* No clobbers */);
5275 return result;
5278 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5279 vaddlvq_s8 (int8x16_t a)
5281 int16_t result;
5282 __asm__ ("saddlv %h0,%1.16b"
5283 : "=w"(result)
5284 : "w"(a)
5285 : /* No clobbers */);
5286 return result;
5289 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5290 vaddlvq_s16 (int16x8_t a)
5292 int32_t result;
5293 __asm__ ("saddlv %s0,%1.8h"
5294 : "=w"(result)
5295 : "w"(a)
5296 : /* No clobbers */);
5297 return result;
5300 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5301 vaddlvq_s32 (int32x4_t a)
5303 int64_t result;
5304 __asm__ ("saddlv %d0,%1.4s"
5305 : "=w"(result)
5306 : "w"(a)
5307 : /* No clobbers */);
5308 return result;
5311 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5312 vaddlvq_u8 (uint8x16_t a)
5314 uint16_t result;
5315 __asm__ ("uaddlv %h0,%1.16b"
5316 : "=w"(result)
5317 : "w"(a)
5318 : /* No clobbers */);
5319 return result;
5322 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5323 vaddlvq_u16 (uint16x8_t a)
5325 uint32_t result;
5326 __asm__ ("uaddlv %s0,%1.8h"
5327 : "=w"(result)
5328 : "w"(a)
5329 : /* No clobbers */);
5330 return result;
5333 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5334 vaddlvq_u32 (uint32x4_t a)
5336 uint64_t result;
5337 __asm__ ("uaddlv %d0,%1.4s"
5338 : "=w"(result)
5339 : "w"(a)
5340 : /* No clobbers */);
5341 return result;
5344 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5345 vcls_s8 (int8x8_t a)
5347 int8x8_t result;
5348 __asm__ ("cls %0.8b,%1.8b"
5349 : "=w"(result)
5350 : "w"(a)
5351 : /* No clobbers */);
5352 return result;
5355 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5356 vcls_s16 (int16x4_t a)
5358 int16x4_t result;
5359 __asm__ ("cls %0.4h,%1.4h"
5360 : "=w"(result)
5361 : "w"(a)
5362 : /* No clobbers */);
5363 return result;
5366 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5367 vcls_s32 (int32x2_t a)
5369 int32x2_t result;
5370 __asm__ ("cls %0.2s,%1.2s"
5371 : "=w"(result)
5372 : "w"(a)
5373 : /* No clobbers */);
5374 return result;
5377 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5378 vclsq_s8 (int8x16_t a)
5380 int8x16_t result;
5381 __asm__ ("cls %0.16b,%1.16b"
5382 : "=w"(result)
5383 : "w"(a)
5384 : /* No clobbers */);
5385 return result;
5388 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5389 vclsq_s16 (int16x8_t a)
5391 int16x8_t result;
5392 __asm__ ("cls %0.8h,%1.8h"
5393 : "=w"(result)
5394 : "w"(a)
5395 : /* No clobbers */);
5396 return result;
5399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5400 vclsq_s32 (int32x4_t a)
5402 int32x4_t result;
5403 __asm__ ("cls %0.4s,%1.4s"
5404 : "=w"(result)
5405 : "w"(a)
5406 : /* No clobbers */);
5407 return result;
5410 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5411 vcnt_p8 (poly8x8_t a)
5413 poly8x8_t result;
5414 __asm__ ("cnt %0.8b,%1.8b"
5415 : "=w"(result)
5416 : "w"(a)
5417 : /* No clobbers */);
5418 return result;
5421 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5422 vcnt_s8 (int8x8_t a)
5424 int8x8_t result;
5425 __asm__ ("cnt %0.8b,%1.8b"
5426 : "=w"(result)
5427 : "w"(a)
5428 : /* No clobbers */);
5429 return result;
5432 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5433 vcnt_u8 (uint8x8_t a)
5435 uint8x8_t result;
5436 __asm__ ("cnt %0.8b,%1.8b"
5437 : "=w"(result)
5438 : "w"(a)
5439 : /* No clobbers */);
5440 return result;
5443 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5444 vcntq_p8 (poly8x16_t a)
5446 poly8x16_t result;
5447 __asm__ ("cnt %0.16b,%1.16b"
5448 : "=w"(result)
5449 : "w"(a)
5450 : /* No clobbers */);
5451 return result;
5454 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5455 vcntq_s8 (int8x16_t a)
5457 int8x16_t result;
5458 __asm__ ("cnt %0.16b,%1.16b"
5459 : "=w"(result)
5460 : "w"(a)
5461 : /* No clobbers */);
5462 return result;
5465 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5466 vcntq_u8 (uint8x16_t a)
5468 uint8x16_t result;
5469 __asm__ ("cnt %0.16b,%1.16b"
5470 : "=w"(result)
5471 : "w"(a)
5472 : /* No clobbers */);
5473 return result;
5476 #define vcopyq_lane_f32(a, b, c, d) \
5477 __extension__ \
5478 ({ \
5479 float32x4_t c_ = (c); \
5480 float32x4_t a_ = (a); \
5481 float32x4_t result; \
5482 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5483 : "=w"(result) \
5484 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5485 : /* No clobbers */); \
5486 result; \
5489 #define vcopyq_lane_f64(a, b, c, d) \
5490 __extension__ \
5491 ({ \
5492 float64x2_t c_ = (c); \
5493 float64x2_t a_ = (a); \
5494 float64x2_t result; \
5495 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5496 : "=w"(result) \
5497 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5498 : /* No clobbers */); \
5499 result; \
5502 #define vcopyq_lane_p8(a, b, c, d) \
5503 __extension__ \
5504 ({ \
5505 poly8x16_t c_ = (c); \
5506 poly8x16_t a_ = (a); \
5507 poly8x16_t result; \
5508 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5509 : "=w"(result) \
5510 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5511 : /* No clobbers */); \
5512 result; \
5515 #define vcopyq_lane_p16(a, b, c, d) \
5516 __extension__ \
5517 ({ \
5518 poly16x8_t c_ = (c); \
5519 poly16x8_t a_ = (a); \
5520 poly16x8_t result; \
5521 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5522 : "=w"(result) \
5523 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5524 : /* No clobbers */); \
5525 result; \
5528 #define vcopyq_lane_s8(a, b, c, d) \
5529 __extension__ \
5530 ({ \
5531 int8x16_t c_ = (c); \
5532 int8x16_t a_ = (a); \
5533 int8x16_t result; \
5534 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5535 : "=w"(result) \
5536 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5537 : /* No clobbers */); \
5538 result; \
5541 #define vcopyq_lane_s16(a, b, c, d) \
5542 __extension__ \
5543 ({ \
5544 int16x8_t c_ = (c); \
5545 int16x8_t a_ = (a); \
5546 int16x8_t result; \
5547 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5548 : "=w"(result) \
5549 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5550 : /* No clobbers */); \
5551 result; \
5554 #define vcopyq_lane_s32(a, b, c, d) \
5555 __extension__ \
5556 ({ \
5557 int32x4_t c_ = (c); \
5558 int32x4_t a_ = (a); \
5559 int32x4_t result; \
5560 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5561 : "=w"(result) \
5562 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5563 : /* No clobbers */); \
5564 result; \
5567 #define vcopyq_lane_s64(a, b, c, d) \
5568 __extension__ \
5569 ({ \
5570 int64x2_t c_ = (c); \
5571 int64x2_t a_ = (a); \
5572 int64x2_t result; \
5573 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5574 : "=w"(result) \
5575 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5576 : /* No clobbers */); \
5577 result; \
5580 #define vcopyq_lane_u8(a, b, c, d) \
5581 __extension__ \
5582 ({ \
5583 uint8x16_t c_ = (c); \
5584 uint8x16_t a_ = (a); \
5585 uint8x16_t result; \
5586 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5587 : "=w"(result) \
5588 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5589 : /* No clobbers */); \
5590 result; \
5593 #define vcopyq_lane_u16(a, b, c, d) \
5594 __extension__ \
5595 ({ \
5596 uint16x8_t c_ = (c); \
5597 uint16x8_t a_ = (a); \
5598 uint16x8_t result; \
5599 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5600 : "=w"(result) \
5601 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5602 : /* No clobbers */); \
5603 result; \
5606 #define vcopyq_lane_u32(a, b, c, d) \
5607 __extension__ \
5608 ({ \
5609 uint32x4_t c_ = (c); \
5610 uint32x4_t a_ = (a); \
5611 uint32x4_t result; \
5612 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5613 : "=w"(result) \
5614 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5615 : /* No clobbers */); \
5616 result; \
5619 #define vcopyq_lane_u64(a, b, c, d) \
5620 __extension__ \
5621 ({ \
5622 uint64x2_t c_ = (c); \
5623 uint64x2_t a_ = (a); \
5624 uint64x2_t result; \
5625 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5626 : "=w"(result) \
5627 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5628 : /* No clobbers */); \
5629 result; \
5632 /* vcvt_f16_f32 not supported */
5634 /* vcvt_f32_f16 not supported */
5636 /* vcvt_high_f16_f32 not supported */
5638 /* vcvt_high_f32_f16 not supported */
5640 static float32x2_t vdup_n_f32 (float32_t);
5642 #define vcvt_n_f32_s32(a, b) \
5643 __extension__ \
5644 ({ \
5645 int32x2_t a_ = (a); \
5646 float32x2_t result; \
5647 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5648 : "=w"(result) \
5649 : "w"(a_), "i"(b) \
5650 : /* No clobbers */); \
5651 result; \
5654 #define vcvt_n_f32_u32(a, b) \
5655 __extension__ \
5656 ({ \
5657 uint32x2_t a_ = (a); \
5658 float32x2_t result; \
5659 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5660 : "=w"(result) \
5661 : "w"(a_), "i"(b) \
5662 : /* No clobbers */); \
5663 result; \
5666 #define vcvt_n_s32_f32(a, b) \
5667 __extension__ \
5668 ({ \
5669 float32x2_t a_ = (a); \
5670 int32x2_t result; \
5671 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5672 : "=w"(result) \
5673 : "w"(a_), "i"(b) \
5674 : /* No clobbers */); \
5675 result; \
5678 #define vcvt_n_u32_f32(a, b) \
5679 __extension__ \
5680 ({ \
5681 float32x2_t a_ = (a); \
5682 uint32x2_t result; \
5683 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5684 : "=w"(result) \
5685 : "w"(a_), "i"(b) \
5686 : /* No clobbers */); \
5687 result; \
5690 #define vcvtd_n_f64_s64(a, b) \
5691 __extension__ \
5692 ({ \
5693 int64_t a_ = (a); \
5694 float64_t result; \
5695 __asm__ ("scvtf %d0,%d1,%2" \
5696 : "=w"(result) \
5697 : "w"(a_), "i"(b) \
5698 : /* No clobbers */); \
5699 result; \
5702 #define vcvtd_n_f64_u64(a, b) \
5703 __extension__ \
5704 ({ \
5705 uint64_t a_ = (a); \
5706 float64_t result; \
5707 __asm__ ("ucvtf %d0,%d1,%2" \
5708 : "=w"(result) \
5709 : "w"(a_), "i"(b) \
5710 : /* No clobbers */); \
5711 result; \
5714 #define vcvtd_n_s64_f64(a, b) \
5715 __extension__ \
5716 ({ \
5717 float64_t a_ = (a); \
5718 int64_t result; \
5719 __asm__ ("fcvtzs %d0,%d1,%2" \
5720 : "=w"(result) \
5721 : "w"(a_), "i"(b) \
5722 : /* No clobbers */); \
5723 result; \
5726 #define vcvtd_n_u64_f64(a, b) \
5727 __extension__ \
5728 ({ \
5729 float64_t a_ = (a); \
5730 uint64_t result; \
5731 __asm__ ("fcvtzu %d0,%d1,%2" \
5732 : "=w"(result) \
5733 : "w"(a_), "i"(b) \
5734 : /* No clobbers */); \
5735 result; \
5738 #define vcvtq_n_f32_s32(a, b) \
5739 __extension__ \
5740 ({ \
5741 int32x4_t a_ = (a); \
5742 float32x4_t result; \
5743 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5744 : "=w"(result) \
5745 : "w"(a_), "i"(b) \
5746 : /* No clobbers */); \
5747 result; \
5750 #define vcvtq_n_f32_u32(a, b) \
5751 __extension__ \
5752 ({ \
5753 uint32x4_t a_ = (a); \
5754 float32x4_t result; \
5755 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5756 : "=w"(result) \
5757 : "w"(a_), "i"(b) \
5758 : /* No clobbers */); \
5759 result; \
5762 #define vcvtq_n_f64_s64(a, b) \
5763 __extension__ \
5764 ({ \
5765 int64x2_t a_ = (a); \
5766 float64x2_t result; \
5767 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5768 : "=w"(result) \
5769 : "w"(a_), "i"(b) \
5770 : /* No clobbers */); \
5771 result; \
5774 #define vcvtq_n_f64_u64(a, b) \
5775 __extension__ \
5776 ({ \
5777 uint64x2_t a_ = (a); \
5778 float64x2_t result; \
5779 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5780 : "=w"(result) \
5781 : "w"(a_), "i"(b) \
5782 : /* No clobbers */); \
5783 result; \
5786 #define vcvtq_n_s32_f32(a, b) \
5787 __extension__ \
5788 ({ \
5789 float32x4_t a_ = (a); \
5790 int32x4_t result; \
5791 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5792 : "=w"(result) \
5793 : "w"(a_), "i"(b) \
5794 : /* No clobbers */); \
5795 result; \
5798 #define vcvtq_n_s64_f64(a, b) \
5799 __extension__ \
5800 ({ \
5801 float64x2_t a_ = (a); \
5802 int64x2_t result; \
5803 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5804 : "=w"(result) \
5805 : "w"(a_), "i"(b) \
5806 : /* No clobbers */); \
5807 result; \
5810 #define vcvtq_n_u32_f32(a, b) \
5811 __extension__ \
5812 ({ \
5813 float32x4_t a_ = (a); \
5814 uint32x4_t result; \
5815 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5816 : "=w"(result) \
5817 : "w"(a_), "i"(b) \
5818 : /* No clobbers */); \
5819 result; \
5822 #define vcvtq_n_u64_f64(a, b) \
5823 __extension__ \
5824 ({ \
5825 float64x2_t a_ = (a); \
5826 uint64x2_t result; \
5827 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5828 : "=w"(result) \
5829 : "w"(a_), "i"(b) \
5830 : /* No clobbers */); \
5831 result; \
5834 #define vcvts_n_f32_s32(a, b) \
5835 __extension__ \
5836 ({ \
5837 int32_t a_ = (a); \
5838 float32_t result; \
5839 __asm__ ("scvtf %s0,%s1,%2" \
5840 : "=w"(result) \
5841 : "w"(a_), "i"(b) \
5842 : /* No clobbers */); \
5843 result; \
5846 #define vcvts_n_f32_u32(a, b) \
5847 __extension__ \
5848 ({ \
5849 uint32_t a_ = (a); \
5850 float32_t result; \
5851 __asm__ ("ucvtf %s0,%s1,%2" \
5852 : "=w"(result) \
5853 : "w"(a_), "i"(b) \
5854 : /* No clobbers */); \
5855 result; \
5858 #define vcvts_n_s32_f32(a, b) \
5859 __extension__ \
5860 ({ \
5861 float32_t a_ = (a); \
5862 int32_t result; \
5863 __asm__ ("fcvtzs %s0,%s1,%2" \
5864 : "=w"(result) \
5865 : "w"(a_), "i"(b) \
5866 : /* No clobbers */); \
5867 result; \
5870 #define vcvts_n_u32_f32(a, b) \
5871 __extension__ \
5872 ({ \
5873 float32_t a_ = (a); \
5874 uint32_t result; \
5875 __asm__ ("fcvtzu %s0,%s1,%2" \
5876 : "=w"(result) \
5877 : "w"(a_), "i"(b) \
5878 : /* No clobbers */); \
5879 result; \
5882 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5883 vcvtx_f32_f64 (float64x2_t a)
5885 float32x2_t result;
5886 __asm__ ("fcvtxn %0.2s,%1.2d"
5887 : "=w"(result)
5888 : "w"(a)
5889 : /* No clobbers */);
5890 return result;
5893 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5894 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5896 float32x4_t result;
5897 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5898 : "=w"(result)
5899 : "w" (b), "0"(a)
5900 : /* No clobbers */);
5901 return result;
5904 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5905 vcvtxd_f32_f64 (float64_t a)
5907 float32_t result;
5908 __asm__ ("fcvtxn %s0,%d1"
5909 : "=w"(result)
5910 : "w"(a)
5911 : /* No clobbers */);
5912 return result;
5915 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5916 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5918 float32x2_t result;
5919 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5920 : "=w"(result)
5921 : "0"(a), "w"(b), "w"(c)
5922 : /* No clobbers */);
5923 return result;
5926 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5927 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5929 float32x4_t result;
5930 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5931 : "=w"(result)
5932 : "0"(a), "w"(b), "w"(c)
5933 : /* No clobbers */);
5934 return result;
5937 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5938 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5940 float64x2_t result;
5941 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5942 : "=w"(result)
5943 : "0"(a), "w"(b), "w"(c)
5944 : /* No clobbers */);
5945 return result;
5948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5949 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5951 float32x2_t result;
5952 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5953 : "=w"(result)
5954 : "0"(a), "w"(b), "w"(c)
5955 : /* No clobbers */);
5956 return result;
5959 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5960 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5962 float32x4_t result;
5963 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5964 : "=w"(result)
5965 : "0"(a), "w"(b), "w"(c)
5966 : /* No clobbers */);
5967 return result;
5970 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5971 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5973 float64x2_t result;
5974 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5975 : "=w"(result)
5976 : "0"(a), "w"(b), "w"(c)
5977 : /* No clobbers */);
5978 return result;
5981 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5982 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5984 float32x2_t result;
5985 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5986 : "=w"(result)
5987 : "0"(a), "w"(b), "w"(c)
5988 : /* No clobbers */);
5989 return result;
5992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5993 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5995 float32x4_t result;
5996 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5997 : "=w"(result)
5998 : "0"(a), "w"(b), "w"(c)
5999 : /* No clobbers */);
6000 return result;
6003 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6004 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6006 float64x2_t result;
6007 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6008 : "=w"(result)
6009 : "0"(a), "w"(b), "w"(c)
6010 : /* No clobbers */);
6011 return result;
6014 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6015 vhsub_s8 (int8x8_t a, int8x8_t b)
6017 int8x8_t result;
6018 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6019 : "=w"(result)
6020 : "w"(a), "w"(b)
6021 : /* No clobbers */);
6022 return result;
6025 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6026 vhsub_s16 (int16x4_t a, int16x4_t b)
6028 int16x4_t result;
6029 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6030 : "=w"(result)
6031 : "w"(a), "w"(b)
6032 : /* No clobbers */);
6033 return result;
6036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6037 vhsub_s32 (int32x2_t a, int32x2_t b)
6039 int32x2_t result;
6040 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6041 : "=w"(result)
6042 : "w"(a), "w"(b)
6043 : /* No clobbers */);
6044 return result;
6047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6048 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6050 uint8x8_t result;
6051 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6052 : "=w"(result)
6053 : "w"(a), "w"(b)
6054 : /* No clobbers */);
6055 return result;
6058 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6059 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6061 uint16x4_t result;
6062 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6063 : "=w"(result)
6064 : "w"(a), "w"(b)
6065 : /* No clobbers */);
6066 return result;
6069 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6070 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6072 uint32x2_t result;
6073 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6074 : "=w"(result)
6075 : "w"(a), "w"(b)
6076 : /* No clobbers */);
6077 return result;
6080 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6081 vhsubq_s8 (int8x16_t a, int8x16_t b)
6083 int8x16_t result;
6084 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6085 : "=w"(result)
6086 : "w"(a), "w"(b)
6087 : /* No clobbers */);
6088 return result;
6091 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6092 vhsubq_s16 (int16x8_t a, int16x8_t b)
6094 int16x8_t result;
6095 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6096 : "=w"(result)
6097 : "w"(a), "w"(b)
6098 : /* No clobbers */);
6099 return result;
6102 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6103 vhsubq_s32 (int32x4_t a, int32x4_t b)
6105 int32x4_t result;
6106 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6107 : "=w"(result)
6108 : "w"(a), "w"(b)
6109 : /* No clobbers */);
6110 return result;
6113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6114 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6116 uint8x16_t result;
6117 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6118 : "=w"(result)
6119 : "w"(a), "w"(b)
6120 : /* No clobbers */);
6121 return result;
6124 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6125 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6127 uint16x8_t result;
6128 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6129 : "=w"(result)
6130 : "w"(a), "w"(b)
6131 : /* No clobbers */);
6132 return result;
6135 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6136 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6138 uint32x4_t result;
6139 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6140 : "=w"(result)
6141 : "w"(a), "w"(b)
6142 : /* No clobbers */);
6143 return result;
6146 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6147 vld1_dup_f32 (const float32_t * a)
6149 float32x2_t result;
6150 __asm__ ("ld1r {%0.2s}, %1"
6151 : "=w"(result)
6152 : "Utv"(*a)
6153 : /* No clobbers */);
6154 return result;
6157 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6158 vld1_dup_f64 (const float64_t * a)
6160 float64x1_t result;
6161 __asm__ ("ld1r {%0.1d}, %1"
6162 : "=w"(result)
6163 : "Utv"(*a)
6164 : /* No clobbers */);
6165 return result;
6168 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6169 vld1_dup_p8 (const poly8_t * a)
6171 poly8x8_t result;
6172 __asm__ ("ld1r {%0.8b}, %1"
6173 : "=w"(result)
6174 : "Utv"(*a)
6175 : /* No clobbers */);
6176 return result;
6179 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6180 vld1_dup_p16 (const poly16_t * a)
6182 poly16x4_t result;
6183 __asm__ ("ld1r {%0.4h}, %1"
6184 : "=w"(result)
6185 : "Utv"(*a)
6186 : /* No clobbers */);
6187 return result;
6190 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6191 vld1_dup_s8 (const int8_t * a)
6193 int8x8_t result;
6194 __asm__ ("ld1r {%0.8b}, %1"
6195 : "=w"(result)
6196 : "Utv"(*a)
6197 : /* No clobbers */);
6198 return result;
6201 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6202 vld1_dup_s16 (const int16_t * a)
6204 int16x4_t result;
6205 __asm__ ("ld1r {%0.4h}, %1"
6206 : "=w"(result)
6207 : "Utv"(*a)
6208 : /* No clobbers */);
6209 return result;
6212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6213 vld1_dup_s32 (const int32_t * a)
6215 int32x2_t result;
6216 __asm__ ("ld1r {%0.2s}, %1"
6217 : "=w"(result)
6218 : "Utv"(*a)
6219 : /* No clobbers */);
6220 return result;
6223 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6224 vld1_dup_s64 (const int64_t * a)
6226 int64x1_t result;
6227 __asm__ ("ld1r {%0.1d}, %1"
6228 : "=w"(result)
6229 : "Utv"(*a)
6230 : /* No clobbers */);
6231 return result;
6234 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6235 vld1_dup_u8 (const uint8_t * a)
6237 uint8x8_t result;
6238 __asm__ ("ld1r {%0.8b}, %1"
6239 : "=w"(result)
6240 : "Utv"(*a)
6241 : /* No clobbers */);
6242 return result;
6245 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6246 vld1_dup_u16 (const uint16_t * a)
6248 uint16x4_t result;
6249 __asm__ ("ld1r {%0.4h}, %1"
6250 : "=w"(result)
6251 : "Utv"(*a)
6252 : /* No clobbers */);
6253 return result;
6256 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6257 vld1_dup_u32 (const uint32_t * a)
6259 uint32x2_t result;
6260 __asm__ ("ld1r {%0.2s}, %1"
6261 : "=w"(result)
6262 : "Utv"(*a)
6263 : /* No clobbers */);
6264 return result;
6267 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6268 vld1_dup_u64 (const uint64_t * a)
6270 uint64x1_t result;
6271 __asm__ ("ld1r {%0.1d}, %1"
6272 : "=w"(result)
6273 : "Utv"(*a)
6274 : /* No clobbers */);
6275 return result;
6278 #define vld1_lane_f32(a, b, c) \
6279 __extension__ \
6280 ({ \
6281 float32x2_t b_ = (b); \
6282 const float32_t * a_ = (a); \
6283 float32x2_t result; \
6284 __asm__ ("ld1 {%0.s}[%1], %2" \
6285 : "=w"(result) \
6286 : "i" (c), "Utv"(*a_), "0"(b_) \
6287 : /* No clobbers */); \
6288 result; \
6291 #define vld1_lane_f64(a, b, c) \
6292 __extension__ \
6293 ({ \
6294 float64x1_t b_ = (b); \
6295 const float64_t * a_ = (a); \
6296 float64x1_t result; \
6297 __asm__ ("ld1 {%0.d}[%1], %2" \
6298 : "=w"(result) \
6299 : "i" (c), "Utv"(*a_), "0"(b_) \
6300 : /* No clobbers */); \
6301 result; \
6304 #define vld1_lane_p8(a, b, c) \
6305 __extension__ \
6306 ({ \
6307 poly8x8_t b_ = (b); \
6308 const poly8_t * a_ = (a); \
6309 poly8x8_t result; \
6310 __asm__ ("ld1 {%0.b}[%1], %2" \
6311 : "=w"(result) \
6312 : "i" (c), "Utv"(*a_), "0"(b_) \
6313 : /* No clobbers */); \
6314 result; \
6317 #define vld1_lane_p16(a, b, c) \
6318 __extension__ \
6319 ({ \
6320 poly16x4_t b_ = (b); \
6321 const poly16_t * a_ = (a); \
6322 poly16x4_t result; \
6323 __asm__ ("ld1 {%0.h}[%1], %2" \
6324 : "=w"(result) \
6325 : "i" (c), "Utv"(*a_), "0"(b_) \
6326 : /* No clobbers */); \
6327 result; \
6330 #define vld1_lane_s8(a, b, c) \
6331 __extension__ \
6332 ({ \
6333 int8x8_t b_ = (b); \
6334 const int8_t * a_ = (a); \
6335 int8x8_t result; \
6336 __asm__ ("ld1 {%0.b}[%1], %2" \
6337 : "=w"(result) \
6338 : "i" (c), "Utv"(*a_), "0"(b_) \
6339 : /* No clobbers */); \
6340 result; \
6343 #define vld1_lane_s16(a, b, c) \
6344 __extension__ \
6345 ({ \
6346 int16x4_t b_ = (b); \
6347 const int16_t * a_ = (a); \
6348 int16x4_t result; \
6349 __asm__ ("ld1 {%0.h}[%1], %2" \
6350 : "=w"(result) \
6351 : "i" (c), "Utv"(*a_), "0"(b_) \
6352 : /* No clobbers */); \
6353 result; \
6356 #define vld1_lane_s32(a, b, c) \
6357 __extension__ \
6358 ({ \
6359 int32x2_t b_ = (b); \
6360 const int32_t * a_ = (a); \
6361 int32x2_t result; \
6362 __asm__ ("ld1 {%0.s}[%1], %2" \
6363 : "=w"(result) \
6364 : "i" (c), "Utv"(*a_), "0"(b_) \
6365 : /* No clobbers */); \
6366 result; \
6369 #define vld1_lane_s64(a, b, c) \
6370 __extension__ \
6371 ({ \
6372 int64x1_t b_ = (b); \
6373 const int64_t * a_ = (a); \
6374 int64x1_t result; \
6375 __asm__ ("ld1 {%0.d}[%1], %2" \
6376 : "=w"(result) \
6377 : "i" (c), "Utv"(*a_), "0"(b_) \
6378 : /* No clobbers */); \
6379 result; \
6382 #define vld1_lane_u8(a, b, c) \
6383 __extension__ \
6384 ({ \
6385 uint8x8_t b_ = (b); \
6386 const uint8_t * a_ = (a); \
6387 uint8x8_t result; \
6388 __asm__ ("ld1 {%0.b}[%1], %2" \
6389 : "=w"(result) \
6390 : "i" (c), "Utv"(*a_), "0"(b_) \
6391 : /* No clobbers */); \
6392 result; \
6395 #define vld1_lane_u16(a, b, c) \
6396 __extension__ \
6397 ({ \
6398 uint16x4_t b_ = (b); \
6399 const uint16_t * a_ = (a); \
6400 uint16x4_t result; \
6401 __asm__ ("ld1 {%0.h}[%1], %2" \
6402 : "=w"(result) \
6403 : "i" (c), "Utv"(*a_), "0"(b_) \
6404 : /* No clobbers */); \
6405 result; \
6408 #define vld1_lane_u32(a, b, c) \
6409 __extension__ \
6410 ({ \
6411 uint32x2_t b_ = (b); \
6412 const uint32_t * a_ = (a); \
6413 uint32x2_t result; \
6414 __asm__ ("ld1 {%0.s}[%1], %2" \
6415 : "=w"(result) \
6416 : "i" (c), "Utv"(*a_), "0"(b_) \
6417 : /* No clobbers */); \
6418 result; \
6421 #define vld1_lane_u64(a, b, c) \
6422 __extension__ \
6423 ({ \
6424 uint64x1_t b_ = (b); \
6425 const uint64_t * a_ = (a); \
6426 uint64x1_t result; \
6427 __asm__ ("ld1 {%0.d}[%1], %2" \
6428 : "=w"(result) \
6429 : "i" (c), "Utv"(*a_), "0"(b_) \
6430 : /* No clobbers */); \
6431 result; \
6434 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6435 vld1q_dup_f32 (const float32_t * a)
6437 float32x4_t result;
6438 __asm__ ("ld1r {%0.4s}, %1"
6439 : "=w"(result)
6440 : "Utv"(*a)
6441 : /* No clobbers */);
6442 return result;
6445 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6446 vld1q_dup_f64 (const float64_t * a)
6448 float64x2_t result;
6449 __asm__ ("ld1r {%0.2d}, %1"
6450 : "=w"(result)
6451 : "Utv"(*a)
6452 : /* No clobbers */);
6453 return result;
6456 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6457 vld1q_dup_p8 (const poly8_t * a)
6459 poly8x16_t result;
6460 __asm__ ("ld1r {%0.16b}, %1"
6461 : "=w"(result)
6462 : "Utv"(*a)
6463 : /* No clobbers */);
6464 return result;
6467 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6468 vld1q_dup_p16 (const poly16_t * a)
6470 poly16x8_t result;
6471 __asm__ ("ld1r {%0.8h}, %1"
6472 : "=w"(result)
6473 : "Utv"(*a)
6474 : /* No clobbers */);
6475 return result;
6478 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6479 vld1q_dup_s8 (const int8_t * a)
6481 int8x16_t result;
6482 __asm__ ("ld1r {%0.16b}, %1"
6483 : "=w"(result)
6484 : "Utv"(*a)
6485 : /* No clobbers */);
6486 return result;
6489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6490 vld1q_dup_s16 (const int16_t * a)
6492 int16x8_t result;
6493 __asm__ ("ld1r {%0.8h}, %1"
6494 : "=w"(result)
6495 : "Utv"(*a)
6496 : /* No clobbers */);
6497 return result;
6500 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6501 vld1q_dup_s32 (const int32_t * a)
6503 int32x4_t result;
6504 __asm__ ("ld1r {%0.4s}, %1"
6505 : "=w"(result)
6506 : "Utv"(*a)
6507 : /* No clobbers */);
6508 return result;
6511 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6512 vld1q_dup_s64 (const int64_t * a)
6514 int64x2_t result;
6515 __asm__ ("ld1r {%0.2d}, %1"
6516 : "=w"(result)
6517 : "Utv"(*a)
6518 : /* No clobbers */);
6519 return result;
6522 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6523 vld1q_dup_u8 (const uint8_t * a)
6525 uint8x16_t result;
6526 __asm__ ("ld1r {%0.16b}, %1"
6527 : "=w"(result)
6528 : "Utv"(*a)
6529 : /* No clobbers */);
6530 return result;
6533 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6534 vld1q_dup_u16 (const uint16_t * a)
6536 uint16x8_t result;
6537 __asm__ ("ld1r {%0.8h}, %1"
6538 : "=w"(result)
6539 : "Utv"(*a)
6540 : /* No clobbers */);
6541 return result;
6544 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6545 vld1q_dup_u32 (const uint32_t * a)
6547 uint32x4_t result;
6548 __asm__ ("ld1r {%0.4s}, %1"
6549 : "=w"(result)
6550 : "Utv"(*a)
6551 : /* No clobbers */);
6552 return result;
6555 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6556 vld1q_dup_u64 (const uint64_t * a)
6558 uint64x2_t result;
6559 __asm__ ("ld1r {%0.2d}, %1"
6560 : "=w"(result)
6561 : "Utv"(*a)
6562 : /* No clobbers */);
6563 return result;
6566 #define vld1q_lane_f32(a, b, c) \
6567 __extension__ \
6568 ({ \
6569 float32x4_t b_ = (b); \
6570 const float32_t * a_ = (a); \
6571 float32x4_t result; \
6572 __asm__ ("ld1 {%0.s}[%1], %2" \
6573 : "=w"(result) \
6574 : "i"(c), "Utv"(*a_), "0"(b_) \
6575 : /* No clobbers */); \
6576 result; \
6579 #define vld1q_lane_f64(a, b, c) \
6580 __extension__ \
6581 ({ \
6582 float64x2_t b_ = (b); \
6583 const float64_t * a_ = (a); \
6584 float64x2_t result; \
6585 __asm__ ("ld1 {%0.d}[%1], %2" \
6586 : "=w"(result) \
6587 : "i"(c), "Utv"(*a_), "0"(b_) \
6588 : /* No clobbers */); \
6589 result; \
6592 #define vld1q_lane_p8(a, b, c) \
6593 __extension__ \
6594 ({ \
6595 poly8x16_t b_ = (b); \
6596 const poly8_t * a_ = (a); \
6597 poly8x16_t result; \
6598 __asm__ ("ld1 {%0.b}[%1], %2" \
6599 : "=w"(result) \
6600 : "i"(c), "Utv"(*a_), "0"(b_) \
6601 : /* No clobbers */); \
6602 result; \
6605 #define vld1q_lane_p16(a, b, c) \
6606 __extension__ \
6607 ({ \
6608 poly16x8_t b_ = (b); \
6609 const poly16_t * a_ = (a); \
6610 poly16x8_t result; \
6611 __asm__ ("ld1 {%0.h}[%1], %2" \
6612 : "=w"(result) \
6613 : "i"(c), "Utv"(*a_), "0"(b_) \
6614 : /* No clobbers */); \
6615 result; \
6618 #define vld1q_lane_s8(a, b, c) \
6619 __extension__ \
6620 ({ \
6621 int8x16_t b_ = (b); \
6622 const int8_t * a_ = (a); \
6623 int8x16_t result; \
6624 __asm__ ("ld1 {%0.b}[%1], %2" \
6625 : "=w"(result) \
6626 : "i"(c), "Utv"(*a_), "0"(b_) \
6627 : /* No clobbers */); \
6628 result; \
6631 #define vld1q_lane_s16(a, b, c) \
6632 __extension__ \
6633 ({ \
6634 int16x8_t b_ = (b); \
6635 const int16_t * a_ = (a); \
6636 int16x8_t result; \
6637 __asm__ ("ld1 {%0.h}[%1], %2" \
6638 : "=w"(result) \
6639 : "i"(c), "Utv"(*a_), "0"(b_) \
6640 : /* No clobbers */); \
6641 result; \
6644 #define vld1q_lane_s32(a, b, c) \
6645 __extension__ \
6646 ({ \
6647 int32x4_t b_ = (b); \
6648 const int32_t * a_ = (a); \
6649 int32x4_t result; \
6650 __asm__ ("ld1 {%0.s}[%1], %2" \
6651 : "=w"(result) \
6652 : "i"(c), "Utv"(*a_), "0"(b_) \
6653 : /* No clobbers */); \
6654 result; \
6657 #define vld1q_lane_s64(a, b, c) \
6658 __extension__ \
6659 ({ \
6660 int64x2_t b_ = (b); \
6661 const int64_t * a_ = (a); \
6662 int64x2_t result; \
6663 __asm__ ("ld1 {%0.d}[%1], %2" \
6664 : "=w"(result) \
6665 : "i"(c), "Utv"(*a_), "0"(b_) \
6666 : /* No clobbers */); \
6667 result; \
6670 #define vld1q_lane_u8(a, b, c) \
6671 __extension__ \
6672 ({ \
6673 uint8x16_t b_ = (b); \
6674 const uint8_t * a_ = (a); \
6675 uint8x16_t result; \
6676 __asm__ ("ld1 {%0.b}[%1], %2" \
6677 : "=w"(result) \
6678 : "i"(c), "Utv"(*a_), "0"(b_) \
6679 : /* No clobbers */); \
6680 result; \
6683 #define vld1q_lane_u16(a, b, c) \
6684 __extension__ \
6685 ({ \
6686 uint16x8_t b_ = (b); \
6687 const uint16_t * a_ = (a); \
6688 uint16x8_t result; \
6689 __asm__ ("ld1 {%0.h}[%1], %2" \
6690 : "=w"(result) \
6691 : "i"(c), "Utv"(*a_), "0"(b_) \
6692 : /* No clobbers */); \
6693 result; \
6696 #define vld1q_lane_u32(a, b, c) \
6697 __extension__ \
6698 ({ \
6699 uint32x4_t b_ = (b); \
6700 const uint32_t * a_ = (a); \
6701 uint32x4_t result; \
6702 __asm__ ("ld1 {%0.s}[%1], %2" \
6703 : "=w"(result) \
6704 : "i"(c), "Utv"(*a_), "0"(b_) \
6705 : /* No clobbers */); \
6706 result; \
6709 #define vld1q_lane_u64(a, b, c) \
6710 __extension__ \
6711 ({ \
6712 uint64x2_t b_ = (b); \
6713 const uint64_t * a_ = (a); \
6714 uint64x2_t result; \
6715 __asm__ ("ld1 {%0.d}[%1], %2" \
6716 : "=w"(result) \
6717 : "i"(c), "Utv"(*a_), "0"(b_) \
6718 : /* No clobbers */); \
6719 result; \
6722 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6723 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6725 float32x2_t result;
6726 float32x2_t t1;
6727 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6728 : "=w"(result), "=w"(t1)
6729 : "0"(a), "w"(b), "w"(c)
6730 : /* No clobbers */);
6731 return result;
6734 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6735 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6737 int16x4_t result;
6738 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6739 : "=w"(result)
6740 : "0"(a), "w"(b), "x"(c)
6741 : /* No clobbers */);
6742 return result;
6745 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6746 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6748 int32x2_t result;
6749 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6750 : "=w"(result)
6751 : "0"(a), "w"(b), "w"(c)
6752 : /* No clobbers */);
6753 return result;
6756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6757 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6759 uint16x4_t result;
6760 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6761 : "=w"(result)
6762 : "0"(a), "w"(b), "x"(c)
6763 : /* No clobbers */);
6764 return result;
6767 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6768 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6770 uint32x2_t result;
6771 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6772 : "=w"(result)
6773 : "0"(a), "w"(b), "w"(c)
6774 : /* No clobbers */);
6775 return result;
6778 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6779 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6781 int8x8_t result;
6782 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6783 : "=w"(result)
6784 : "0"(a), "w"(b), "w"(c)
6785 : /* No clobbers */);
6786 return result;
6789 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6790 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6792 int16x4_t result;
6793 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6794 : "=w"(result)
6795 : "0"(a), "w"(b), "w"(c)
6796 : /* No clobbers */);
6797 return result;
6800 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6801 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6803 int32x2_t result;
6804 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6805 : "=w"(result)
6806 : "0"(a), "w"(b), "w"(c)
6807 : /* No clobbers */);
6808 return result;
6811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6812 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6814 uint8x8_t result;
6815 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6816 : "=w"(result)
6817 : "0"(a), "w"(b), "w"(c)
6818 : /* No clobbers */);
6819 return result;
6822 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6823 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6825 uint16x4_t result;
6826 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6827 : "=w"(result)
6828 : "0"(a), "w"(b), "w"(c)
6829 : /* No clobbers */);
6830 return result;
6833 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6834 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6836 uint32x2_t result;
6837 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6838 : "=w"(result)
6839 : "0"(a), "w"(b), "w"(c)
6840 : /* No clobbers */);
6841 return result;
6844 #define vmlal_high_lane_s16(a, b, c, d) \
6845 __extension__ \
6846 ({ \
6847 int16x4_t c_ = (c); \
6848 int16x8_t b_ = (b); \
6849 int32x4_t a_ = (a); \
6850 int32x4_t result; \
6851 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6852 : "=w"(result) \
6853 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6854 : /* No clobbers */); \
6855 result; \
6858 #define vmlal_high_lane_s32(a, b, c, d) \
6859 __extension__ \
6860 ({ \
6861 int32x2_t c_ = (c); \
6862 int32x4_t b_ = (b); \
6863 int64x2_t a_ = (a); \
6864 int64x2_t result; \
6865 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6866 : "=w"(result) \
6867 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6868 : /* No clobbers */); \
6869 result; \
6872 #define vmlal_high_lane_u16(a, b, c, d) \
6873 __extension__ \
6874 ({ \
6875 uint16x4_t c_ = (c); \
6876 uint16x8_t b_ = (b); \
6877 uint32x4_t a_ = (a); \
6878 uint32x4_t result; \
6879 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6880 : "=w"(result) \
6881 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6882 : /* No clobbers */); \
6883 result; \
6886 #define vmlal_high_lane_u32(a, b, c, d) \
6887 __extension__ \
6888 ({ \
6889 uint32x2_t c_ = (c); \
6890 uint32x4_t b_ = (b); \
6891 uint64x2_t a_ = (a); \
6892 uint64x2_t result; \
6893 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6894 : "=w"(result) \
6895 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6896 : /* No clobbers */); \
6897 result; \
6900 #define vmlal_high_laneq_s16(a, b, c, d) \
6901 __extension__ \
6902 ({ \
6903 int16x8_t c_ = (c); \
6904 int16x8_t b_ = (b); \
6905 int32x4_t a_ = (a); \
6906 int32x4_t result; \
6907 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6908 : "=w"(result) \
6909 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6910 : /* No clobbers */); \
6911 result; \
6914 #define vmlal_high_laneq_s32(a, b, c, d) \
6915 __extension__ \
6916 ({ \
6917 int32x4_t c_ = (c); \
6918 int32x4_t b_ = (b); \
6919 int64x2_t a_ = (a); \
6920 int64x2_t result; \
6921 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6922 : "=w"(result) \
6923 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6924 : /* No clobbers */); \
6925 result; \
6928 #define vmlal_high_laneq_u16(a, b, c, d) \
6929 __extension__ \
6930 ({ \
6931 uint16x8_t c_ = (c); \
6932 uint16x8_t b_ = (b); \
6933 uint32x4_t a_ = (a); \
6934 uint32x4_t result; \
6935 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6936 : "=w"(result) \
6937 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6938 : /* No clobbers */); \
6939 result; \
6942 #define vmlal_high_laneq_u32(a, b, c, d) \
6943 __extension__ \
6944 ({ \
6945 uint32x4_t c_ = (c); \
6946 uint32x4_t b_ = (b); \
6947 uint64x2_t a_ = (a); \
6948 uint64x2_t result; \
6949 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6950 : "=w"(result) \
6951 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6952 : /* No clobbers */); \
6953 result; \
6956 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6957 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6959 int32x4_t result;
6960 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6961 : "=w"(result)
6962 : "0"(a), "w"(b), "x"(c)
6963 : /* No clobbers */);
6964 return result;
6967 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6968 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6970 int64x2_t result;
6971 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6972 : "=w"(result)
6973 : "0"(a), "w"(b), "w"(c)
6974 : /* No clobbers */);
6975 return result;
6978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6979 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6981 uint32x4_t result;
6982 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6983 : "=w"(result)
6984 : "0"(a), "w"(b), "x"(c)
6985 : /* No clobbers */);
6986 return result;
6989 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6990 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6992 uint64x2_t result;
6993 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6994 : "=w"(result)
6995 : "0"(a), "w"(b), "w"(c)
6996 : /* No clobbers */);
6997 return result;
7000 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7001 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7003 int16x8_t result;
7004 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
7005 : "=w"(result)
7006 : "0"(a), "w"(b), "w"(c)
7007 : /* No clobbers */);
7008 return result;
7011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7012 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7014 int32x4_t result;
7015 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
7016 : "=w"(result)
7017 : "0"(a), "w"(b), "w"(c)
7018 : /* No clobbers */);
7019 return result;
7022 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7023 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7025 int64x2_t result;
7026 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
7027 : "=w"(result)
7028 : "0"(a), "w"(b), "w"(c)
7029 : /* No clobbers */);
7030 return result;
7033 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7034 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7036 uint16x8_t result;
7037 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
7038 : "=w"(result)
7039 : "0"(a), "w"(b), "w"(c)
7040 : /* No clobbers */);
7041 return result;
7044 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7045 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7047 uint32x4_t result;
7048 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
7049 : "=w"(result)
7050 : "0"(a), "w"(b), "w"(c)
7051 : /* No clobbers */);
7052 return result;
7055 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7056 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7058 uint64x2_t result;
7059 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7060 : "=w"(result)
7061 : "0"(a), "w"(b), "w"(c)
7062 : /* No clobbers */);
7063 return result;
7066 #define vmlal_lane_s16(a, b, c, d) \
7067 __extension__ \
7068 ({ \
7069 int16x4_t c_ = (c); \
7070 int16x4_t b_ = (b); \
7071 int32x4_t a_ = (a); \
7072 int32x4_t result; \
7073 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7074 : "=w"(result) \
7075 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7076 : /* No clobbers */); \
7077 result; \
7080 #define vmlal_lane_s32(a, b, c, d) \
7081 __extension__ \
7082 ({ \
7083 int32x2_t c_ = (c); \
7084 int32x2_t b_ = (b); \
7085 int64x2_t a_ = (a); \
7086 int64x2_t result; \
7087 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7088 : "=w"(result) \
7089 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7090 : /* No clobbers */); \
7091 result; \
7094 #define vmlal_lane_u16(a, b, c, d) \
7095 __extension__ \
7096 ({ \
7097 uint16x4_t c_ = (c); \
7098 uint16x4_t b_ = (b); \
7099 uint32x4_t a_ = (a); \
7100 uint32x4_t result; \
7101 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7102 : "=w"(result) \
7103 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7104 : /* No clobbers */); \
7105 result; \
7108 #define vmlal_lane_u32(a, b, c, d) \
7109 __extension__ \
7110 ({ \
7111 uint32x2_t c_ = (c); \
7112 uint32x2_t b_ = (b); \
7113 uint64x2_t a_ = (a); \
7114 uint64x2_t result; \
7115 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7116 : "=w"(result) \
7117 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7118 : /* No clobbers */); \
7119 result; \
7122 #define vmlal_laneq_s16(a, b, c, d) \
7123 __extension__ \
7124 ({ \
7125 int16x8_t c_ = (c); \
7126 int16x4_t b_ = (b); \
7127 int32x4_t a_ = (a); \
7128 int32x4_t result; \
7129 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7130 : "=w"(result) \
7131 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7132 : /* No clobbers */); \
7133 result; \
7136 #define vmlal_laneq_s32(a, b, c, d) \
7137 __extension__ \
7138 ({ \
7139 int32x4_t c_ = (c); \
7140 int32x2_t b_ = (b); \
7141 int64x2_t a_ = (a); \
7142 int64x2_t result; \
7143 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7144 : "=w"(result) \
7145 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7146 : /* No clobbers */); \
7147 result; \
7150 #define vmlal_laneq_u16(a, b, c, d) \
7151 __extension__ \
7152 ({ \
7153 uint16x8_t c_ = (c); \
7154 uint16x4_t b_ = (b); \
7155 uint32x4_t a_ = (a); \
7156 uint32x4_t result; \
7157 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7158 : "=w"(result) \
7159 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7160 : /* No clobbers */); \
7161 result; \
7164 #define vmlal_laneq_u32(a, b, c, d) \
7165 __extension__ \
7166 ({ \
7167 uint32x4_t c_ = (c); \
7168 uint32x2_t b_ = (b); \
7169 uint64x2_t a_ = (a); \
7170 uint64x2_t result; \
7171 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7172 : "=w"(result) \
7173 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7174 : /* No clobbers */); \
7175 result; \
7178 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7179 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7181 int32x4_t result;
7182 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7183 : "=w"(result)
7184 : "0"(a), "w"(b), "x"(c)
7185 : /* No clobbers */);
7186 return result;
7189 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7190 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7192 int64x2_t result;
7193 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7194 : "=w"(result)
7195 : "0"(a), "w"(b), "w"(c)
7196 : /* No clobbers */);
7197 return result;
7200 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7201 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7203 uint32x4_t result;
7204 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7205 : "=w"(result)
7206 : "0"(a), "w"(b), "x"(c)
7207 : /* No clobbers */);
7208 return result;
7211 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7212 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7214 uint64x2_t result;
7215 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7216 : "=w"(result)
7217 : "0"(a), "w"(b), "w"(c)
7218 : /* No clobbers */);
7219 return result;
7222 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7223 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7225 int16x8_t result;
7226 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7227 : "=w"(result)
7228 : "0"(a), "w"(b), "w"(c)
7229 : /* No clobbers */);
7230 return result;
7233 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7234 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7236 int32x4_t result;
7237 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7238 : "=w"(result)
7239 : "0"(a), "w"(b), "w"(c)
7240 : /* No clobbers */);
7241 return result;
7244 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7245 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7247 int64x2_t result;
7248 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7249 : "=w"(result)
7250 : "0"(a), "w"(b), "w"(c)
7251 : /* No clobbers */);
7252 return result;
7255 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7256 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7258 uint16x8_t result;
7259 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7260 : "=w"(result)
7261 : "0"(a), "w"(b), "w"(c)
7262 : /* No clobbers */);
7263 return result;
7266 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7267 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7269 uint32x4_t result;
7270 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7271 : "=w"(result)
7272 : "0"(a), "w"(b), "w"(c)
7273 : /* No clobbers */);
7274 return result;
7277 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7278 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7280 uint64x2_t result;
7281 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7282 : "=w"(result)
7283 : "0"(a), "w"(b), "w"(c)
7284 : /* No clobbers */);
7285 return result;
7288 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7289 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7291 float32x4_t result;
7292 float32x4_t t1;
7293 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7294 : "=w"(result), "=w"(t1)
7295 : "0"(a), "w"(b), "w"(c)
7296 : /* No clobbers */);
7297 return result;
7300 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7301 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7303 int16x8_t result;
7304 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7305 : "=w"(result)
7306 : "0"(a), "w"(b), "x"(c)
7307 : /* No clobbers */);
7308 return result;
7311 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7312 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7314 int32x4_t result;
7315 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7316 : "=w"(result)
7317 : "0"(a), "w"(b), "w"(c)
7318 : /* No clobbers */);
7319 return result;
7322 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7323 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7325 uint16x8_t result;
7326 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7327 : "=w"(result)
7328 : "0"(a), "w"(b), "x"(c)
7329 : /* No clobbers */);
7330 return result;
7333 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7334 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7336 uint32x4_t result;
7337 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7338 : "=w"(result)
7339 : "0"(a), "w"(b), "w"(c)
7340 : /* No clobbers */);
7341 return result;
7344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7345 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7347 int8x16_t result;
7348 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7349 : "=w"(result)
7350 : "0"(a), "w"(b), "w"(c)
7351 : /* No clobbers */);
7352 return result;
7355 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7356 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7358 int16x8_t result;
7359 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7360 : "=w"(result)
7361 : "0"(a), "w"(b), "w"(c)
7362 : /* No clobbers */);
7363 return result;
7366 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7367 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7369 int32x4_t result;
7370 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7371 : "=w"(result)
7372 : "0"(a), "w"(b), "w"(c)
7373 : /* No clobbers */);
7374 return result;
7377 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7378 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7380 uint8x16_t result;
7381 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7382 : "=w"(result)
7383 : "0"(a), "w"(b), "w"(c)
7384 : /* No clobbers */);
7385 return result;
7388 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7389 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7391 uint16x8_t result;
7392 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7393 : "=w"(result)
7394 : "0"(a), "w"(b), "w"(c)
7395 : /* No clobbers */);
7396 return result;
7399 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7400 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7402 uint32x4_t result;
7403 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7404 : "=w"(result)
7405 : "0"(a), "w"(b), "w"(c)
7406 : /* No clobbers */);
7407 return result;
7410 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7411 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7413 float32x2_t result;
7414 float32x2_t t1;
7415 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7416 : "=w"(result), "=w"(t1)
7417 : "0"(a), "w"(b), "w"(c)
7418 : /* No clobbers */);
7419 return result;
7422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7423 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7425 int16x4_t result;
7426 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7427 : "=w"(result)
7428 : "0"(a), "w"(b), "x"(c)
7429 : /* No clobbers */);
7430 return result;
7433 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7434 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7436 int32x2_t result;
7437 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7438 : "=w"(result)
7439 : "0"(a), "w"(b), "w"(c)
7440 : /* No clobbers */);
7441 return result;
7444 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7445 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7447 uint16x4_t result;
7448 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7449 : "=w"(result)
7450 : "0"(a), "w"(b), "x"(c)
7451 : /* No clobbers */);
7452 return result;
7455 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7456 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7458 uint32x2_t result;
7459 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7460 : "=w"(result)
7461 : "0"(a), "w"(b), "w"(c)
7462 : /* No clobbers */);
7463 return result;
7466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7467 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7469 int8x8_t result;
7470 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7471 : "=w"(result)
7472 : "0"(a), "w"(b), "w"(c)
7473 : /* No clobbers */);
7474 return result;
7477 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7478 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7480 int16x4_t result;
7481 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7482 : "=w"(result)
7483 : "0"(a), "w"(b), "w"(c)
7484 : /* No clobbers */);
7485 return result;
7488 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7489 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7491 int32x2_t result;
7492 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7493 : "=w"(result)
7494 : "0"(a), "w"(b), "w"(c)
7495 : /* No clobbers */);
7496 return result;
7499 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7500 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7502 uint8x8_t result;
7503 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7504 : "=w"(result)
7505 : "0"(a), "w"(b), "w"(c)
7506 : /* No clobbers */);
7507 return result;
7510 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7511 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7513 uint16x4_t result;
7514 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7515 : "=w"(result)
7516 : "0"(a), "w"(b), "w"(c)
7517 : /* No clobbers */);
7518 return result;
7521 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7522 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7524 uint32x2_t result;
7525 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7526 : "=w"(result)
7527 : "0"(a), "w"(b), "w"(c)
7528 : /* No clobbers */);
7529 return result;
7532 #define vmlsl_high_lane_s16(a, b, c, d) \
7533 __extension__ \
7534 ({ \
7535 int16x4_t c_ = (c); \
7536 int16x8_t b_ = (b); \
7537 int32x4_t a_ = (a); \
7538 int32x4_t result; \
7539 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7540 : "=w"(result) \
7541 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7542 : /* No clobbers */); \
7543 result; \
7546 #define vmlsl_high_lane_s32(a, b, c, d) \
7547 __extension__ \
7548 ({ \
7549 int32x2_t c_ = (c); \
7550 int32x4_t b_ = (b); \
7551 int64x2_t a_ = (a); \
7552 int64x2_t result; \
7553 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7554 : "=w"(result) \
7555 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7556 : /* No clobbers */); \
7557 result; \
7560 #define vmlsl_high_lane_u16(a, b, c, d) \
7561 __extension__ \
7562 ({ \
7563 uint16x4_t c_ = (c); \
7564 uint16x8_t b_ = (b); \
7565 uint32x4_t a_ = (a); \
7566 uint32x4_t result; \
7567 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7568 : "=w"(result) \
7569 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7570 : /* No clobbers */); \
7571 result; \
7574 #define vmlsl_high_lane_u32(a, b, c, d) \
7575 __extension__ \
7576 ({ \
7577 uint32x2_t c_ = (c); \
7578 uint32x4_t b_ = (b); \
7579 uint64x2_t a_ = (a); \
7580 uint64x2_t result; \
7581 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7582 : "=w"(result) \
7583 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7584 : /* No clobbers */); \
7585 result; \
7588 #define vmlsl_high_laneq_s16(a, b, c, d) \
7589 __extension__ \
7590 ({ \
7591 int16x8_t c_ = (c); \
7592 int16x8_t b_ = (b); \
7593 int32x4_t a_ = (a); \
7594 int32x4_t result; \
7595 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7596 : "=w"(result) \
7597 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7598 : /* No clobbers */); \
7599 result; \
7602 #define vmlsl_high_laneq_s32(a, b, c, d) \
7603 __extension__ \
7604 ({ \
7605 int32x4_t c_ = (c); \
7606 int32x4_t b_ = (b); \
7607 int64x2_t a_ = (a); \
7608 int64x2_t result; \
7609 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7610 : "=w"(result) \
7611 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7612 : /* No clobbers */); \
7613 result; \
7616 #define vmlsl_high_laneq_u16(a, b, c, d) \
7617 __extension__ \
7618 ({ \
7619 uint16x8_t c_ = (c); \
7620 uint16x8_t b_ = (b); \
7621 uint32x4_t a_ = (a); \
7622 uint32x4_t result; \
7623 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7624 : "=w"(result) \
7625 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7626 : /* No clobbers */); \
7627 result; \
7630 #define vmlsl_high_laneq_u32(a, b, c, d) \
7631 __extension__ \
7632 ({ \
7633 uint32x4_t c_ = (c); \
7634 uint32x4_t b_ = (b); \
7635 uint64x2_t a_ = (a); \
7636 uint64x2_t result; \
7637 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7638 : "=w"(result) \
7639 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7640 : /* No clobbers */); \
7641 result; \
7644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7645 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7647 int32x4_t result;
7648 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7649 : "=w"(result)
7650 : "0"(a), "w"(b), "x"(c)
7651 : /* No clobbers */);
7652 return result;
7655 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7656 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7658 int64x2_t result;
7659 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7660 : "=w"(result)
7661 : "0"(a), "w"(b), "w"(c)
7662 : /* No clobbers */);
7663 return result;
7666 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7667 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7669 uint32x4_t result;
7670 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7671 : "=w"(result)
7672 : "0"(a), "w"(b), "x"(c)
7673 : /* No clobbers */);
7674 return result;
7677 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7678 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7680 uint64x2_t result;
7681 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7682 : "=w"(result)
7683 : "0"(a), "w"(b), "w"(c)
7684 : /* No clobbers */);
7685 return result;
7688 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7689 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7691 int16x8_t result;
7692 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7693 : "=w"(result)
7694 : "0"(a), "w"(b), "w"(c)
7695 : /* No clobbers */);
7696 return result;
7699 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7700 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7702 int32x4_t result;
7703 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7704 : "=w"(result)
7705 : "0"(a), "w"(b), "w"(c)
7706 : /* No clobbers */);
7707 return result;
7710 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7711 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7713 int64x2_t result;
7714 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7715 : "=w"(result)
7716 : "0"(a), "w"(b), "w"(c)
7717 : /* No clobbers */);
7718 return result;
7721 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7722 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7724 uint16x8_t result;
7725 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7726 : "=w"(result)
7727 : "0"(a), "w"(b), "w"(c)
7728 : /* No clobbers */);
7729 return result;
7732 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7733 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7735 uint32x4_t result;
7736 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7737 : "=w"(result)
7738 : "0"(a), "w"(b), "w"(c)
7739 : /* No clobbers */);
7740 return result;
7743 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7744 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7746 uint64x2_t result;
7747 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7748 : "=w"(result)
7749 : "0"(a), "w"(b), "w"(c)
7750 : /* No clobbers */);
7751 return result;
7754 #define vmlsl_lane_s16(a, b, c, d) \
7755 __extension__ \
7756 ({ \
7757 int16x4_t c_ = (c); \
7758 int16x4_t b_ = (b); \
7759 int32x4_t a_ = (a); \
7760 int32x4_t result; \
7761 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7762 : "=w"(result) \
7763 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7764 : /* No clobbers */); \
7765 result; \
7768 #define vmlsl_lane_s32(a, b, c, d) \
7769 __extension__ \
7770 ({ \
7771 int32x2_t c_ = (c); \
7772 int32x2_t b_ = (b); \
7773 int64x2_t a_ = (a); \
7774 int64x2_t result; \
7775 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7776 : "=w"(result) \
7777 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7778 : /* No clobbers */); \
7779 result; \
7782 #define vmlsl_lane_u16(a, b, c, d) \
7783 __extension__ \
7784 ({ \
7785 uint16x4_t c_ = (c); \
7786 uint16x4_t b_ = (b); \
7787 uint32x4_t a_ = (a); \
7788 uint32x4_t result; \
7789 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7790 : "=w"(result) \
7791 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7792 : /* No clobbers */); \
7793 result; \
7796 #define vmlsl_lane_u32(a, b, c, d) \
7797 __extension__ \
7798 ({ \
7799 uint32x2_t c_ = (c); \
7800 uint32x2_t b_ = (b); \
7801 uint64x2_t a_ = (a); \
7802 uint64x2_t result; \
7803 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7804 : "=w"(result) \
7805 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7806 : /* No clobbers */); \
7807 result; \
7810 #define vmlsl_laneq_s16(a, b, c, d) \
7811 __extension__ \
7812 ({ \
7813 int16x8_t c_ = (c); \
7814 int16x4_t b_ = (b); \
7815 int32x4_t a_ = (a); \
7816 int32x4_t result; \
7817 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7818 : "=w"(result) \
7819 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7820 : /* No clobbers */); \
7821 result; \
7824 #define vmlsl_laneq_s32(a, b, c, d) \
7825 __extension__ \
7826 ({ \
7827 int32x4_t c_ = (c); \
7828 int32x2_t b_ = (b); \
7829 int64x2_t a_ = (a); \
7830 int64x2_t result; \
7831 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7832 : "=w"(result) \
7833 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7834 : /* No clobbers */); \
7835 result; \
7838 #define vmlsl_laneq_u16(a, b, c, d) \
7839 __extension__ \
7840 ({ \
7841 uint16x8_t c_ = (c); \
7842 uint16x4_t b_ = (b); \
7843 uint32x4_t a_ = (a); \
7844 uint32x4_t result; \
7845 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7846 : "=w"(result) \
7847 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7848 : /* No clobbers */); \
7849 result; \
7852 #define vmlsl_laneq_u32(a, b, c, d) \
7853 __extension__ \
7854 ({ \
7855 uint32x4_t c_ = (c); \
7856 uint32x2_t b_ = (b); \
7857 uint64x2_t a_ = (a); \
7858 uint64x2_t result; \
7859 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7860 : "=w"(result) \
7861 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7862 : /* No clobbers */); \
7863 result; \
7866 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7867 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7869 int32x4_t result;
7870 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7871 : "=w"(result)
7872 : "0"(a), "w"(b), "x"(c)
7873 : /* No clobbers */);
7874 return result;
7877 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7878 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7880 int64x2_t result;
7881 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7882 : "=w"(result)
7883 : "0"(a), "w"(b), "w"(c)
7884 : /* No clobbers */);
7885 return result;
7888 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7889 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7891 uint32x4_t result;
7892 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7893 : "=w"(result)
7894 : "0"(a), "w"(b), "x"(c)
7895 : /* No clobbers */);
7896 return result;
7899 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7900 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7902 uint64x2_t result;
7903 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7904 : "=w"(result)
7905 : "0"(a), "w"(b), "w"(c)
7906 : /* No clobbers */);
7907 return result;
7910 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7911 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7913 int16x8_t result;
7914 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7915 : "=w"(result)
7916 : "0"(a), "w"(b), "w"(c)
7917 : /* No clobbers */);
7918 return result;
7921 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7922 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7924 int32x4_t result;
7925 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7926 : "=w"(result)
7927 : "0"(a), "w"(b), "w"(c)
7928 : /* No clobbers */);
7929 return result;
7932 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7933 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7935 int64x2_t result;
7936 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7937 : "=w"(result)
7938 : "0"(a), "w"(b), "w"(c)
7939 : /* No clobbers */);
7940 return result;
7943 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7944 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7946 uint16x8_t result;
7947 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7948 : "=w"(result)
7949 : "0"(a), "w"(b), "w"(c)
7950 : /* No clobbers */);
7951 return result;
7954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7955 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7957 uint32x4_t result;
7958 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7959 : "=w"(result)
7960 : "0"(a), "w"(b), "w"(c)
7961 : /* No clobbers */);
7962 return result;
7965 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7966 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7968 uint64x2_t result;
7969 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7970 : "=w"(result)
7971 : "0"(a), "w"(b), "w"(c)
7972 : /* No clobbers */);
7973 return result;
7976 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7977 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7979 float32x4_t result;
7980 float32x4_t t1;
7981 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7982 : "=w"(result), "=w"(t1)
7983 : "0"(a), "w"(b), "w"(c)
7984 : /* No clobbers */);
7985 return result;
7988 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7989 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7991 int16x8_t result;
7992 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7993 : "=w"(result)
7994 : "0"(a), "w"(b), "x"(c)
7995 : /* No clobbers */);
7996 return result;
7999 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8000 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8002 int32x4_t result;
8003 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8004 : "=w"(result)
8005 : "0"(a), "w"(b), "w"(c)
8006 : /* No clobbers */);
8007 return result;
8010 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8011 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8013 uint16x8_t result;
8014 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8015 : "=w"(result)
8016 : "0"(a), "w"(b), "x"(c)
8017 : /* No clobbers */);
8018 return result;
8021 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8022 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8024 uint32x4_t result;
8025 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8026 : "=w"(result)
8027 : "0"(a), "w"(b), "w"(c)
8028 : /* No clobbers */);
8029 return result;
8032 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8033 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8035 int8x16_t result;
8036 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8037 : "=w"(result)
8038 : "0"(a), "w"(b), "w"(c)
8039 : /* No clobbers */);
8040 return result;
8043 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8044 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8046 int16x8_t result;
8047 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8048 : "=w"(result)
8049 : "0"(a), "w"(b), "w"(c)
8050 : /* No clobbers */);
8051 return result;
8054 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8055 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8057 int32x4_t result;
8058 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8059 : "=w"(result)
8060 : "0"(a), "w"(b), "w"(c)
8061 : /* No clobbers */);
8062 return result;
8065 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8066 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8068 uint8x16_t result;
8069 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8070 : "=w"(result)
8071 : "0"(a), "w"(b), "w"(c)
8072 : /* No clobbers */);
8073 return result;
8076 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8077 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8079 uint16x8_t result;
8080 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8081 : "=w"(result)
8082 : "0"(a), "w"(b), "w"(c)
8083 : /* No clobbers */);
8084 return result;
8087 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8088 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8090 uint32x4_t result;
8091 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8092 : "=w"(result)
8093 : "0"(a), "w"(b), "w"(c)
8094 : /* No clobbers */);
8095 return result;
8098 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8099 vmovl_high_s8 (int8x16_t a)
8101 int16x8_t result;
8102 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8103 : "=w"(result)
8104 : "w"(a)
8105 : /* No clobbers */);
8106 return result;
8109 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8110 vmovl_high_s16 (int16x8_t a)
8112 int32x4_t result;
8113 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8114 : "=w"(result)
8115 : "w"(a)
8116 : /* No clobbers */);
8117 return result;
8120 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8121 vmovl_high_s32 (int32x4_t a)
8123 int64x2_t result;
8124 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8125 : "=w"(result)
8126 : "w"(a)
8127 : /* No clobbers */);
8128 return result;
8131 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8132 vmovl_high_u8 (uint8x16_t a)
8134 uint16x8_t result;
8135 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8136 : "=w"(result)
8137 : "w"(a)
8138 : /* No clobbers */);
8139 return result;
8142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8143 vmovl_high_u16 (uint16x8_t a)
8145 uint32x4_t result;
8146 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8147 : "=w"(result)
8148 : "w"(a)
8149 : /* No clobbers */);
8150 return result;
8153 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8154 vmovl_high_u32 (uint32x4_t a)
8156 uint64x2_t result;
8157 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8158 : "=w"(result)
8159 : "w"(a)
8160 : /* No clobbers */);
8161 return result;
8164 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8165 vmovl_s8 (int8x8_t a)
8167 int16x8_t result;
8168 __asm__ ("sshll %0.8h,%1.8b,#0"
8169 : "=w"(result)
8170 : "w"(a)
8171 : /* No clobbers */);
8172 return result;
8175 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8176 vmovl_s16 (int16x4_t a)
8178 int32x4_t result;
8179 __asm__ ("sshll %0.4s,%1.4h,#0"
8180 : "=w"(result)
8181 : "w"(a)
8182 : /* No clobbers */);
8183 return result;
8186 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8187 vmovl_s32 (int32x2_t a)
8189 int64x2_t result;
8190 __asm__ ("sshll %0.2d,%1.2s,#0"
8191 : "=w"(result)
8192 : "w"(a)
8193 : /* No clobbers */);
8194 return result;
8197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8198 vmovl_u8 (uint8x8_t a)
8200 uint16x8_t result;
8201 __asm__ ("ushll %0.8h,%1.8b,#0"
8202 : "=w"(result)
8203 : "w"(a)
8204 : /* No clobbers */);
8205 return result;
8208 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8209 vmovl_u16 (uint16x4_t a)
8211 uint32x4_t result;
8212 __asm__ ("ushll %0.4s,%1.4h,#0"
8213 : "=w"(result)
8214 : "w"(a)
8215 : /* No clobbers */);
8216 return result;
8219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8220 vmovl_u32 (uint32x2_t a)
8222 uint64x2_t result;
8223 __asm__ ("ushll %0.2d,%1.2s,#0"
8224 : "=w"(result)
8225 : "w"(a)
8226 : /* No clobbers */);
8227 return result;
8230 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8231 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8233 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8234 __asm__ ("xtn2 %0.16b,%1.8h"
8235 : "+w"(result)
8236 : "w"(b)
8237 : /* No clobbers */);
8238 return result;
8241 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8242 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8244 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8245 __asm__ ("xtn2 %0.8h,%1.4s"
8246 : "+w"(result)
8247 : "w"(b)
8248 : /* No clobbers */);
8249 return result;
8252 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8253 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8255 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8256 __asm__ ("xtn2 %0.4s,%1.2d"
8257 : "+w"(result)
8258 : "w"(b)
8259 : /* No clobbers */);
8260 return result;
8263 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8264 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8266 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8267 __asm__ ("xtn2 %0.16b,%1.8h"
8268 : "+w"(result)
8269 : "w"(b)
8270 : /* No clobbers */);
8271 return result;
8274 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8275 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8277 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8278 __asm__ ("xtn2 %0.8h,%1.4s"
8279 : "+w"(result)
8280 : "w"(b)
8281 : /* No clobbers */);
8282 return result;
8285 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8286 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8288 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8289 __asm__ ("xtn2 %0.4s,%1.2d"
8290 : "+w"(result)
8291 : "w"(b)
8292 : /* No clobbers */);
8293 return result;
8296 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8297 vmovn_s16 (int16x8_t a)
8299 int8x8_t result;
8300 __asm__ ("xtn %0.8b,%1.8h"
8301 : "=w"(result)
8302 : "w"(a)
8303 : /* No clobbers */);
8304 return result;
8307 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8308 vmovn_s32 (int32x4_t a)
8310 int16x4_t result;
8311 __asm__ ("xtn %0.4h,%1.4s"
8312 : "=w"(result)
8313 : "w"(a)
8314 : /* No clobbers */);
8315 return result;
8318 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8319 vmovn_s64 (int64x2_t a)
8321 int32x2_t result;
8322 __asm__ ("xtn %0.2s,%1.2d"
8323 : "=w"(result)
8324 : "w"(a)
8325 : /* No clobbers */);
8326 return result;
8329 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8330 vmovn_u16 (uint16x8_t a)
8332 uint8x8_t result;
8333 __asm__ ("xtn %0.8b,%1.8h"
8334 : "=w"(result)
8335 : "w"(a)
8336 : /* No clobbers */);
8337 return result;
8340 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8341 vmovn_u32 (uint32x4_t a)
8343 uint16x4_t result;
8344 __asm__ ("xtn %0.4h,%1.4s"
8345 : "=w"(result)
8346 : "w"(a)
8347 : /* No clobbers */);
8348 return result;
8351 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8352 vmovn_u64 (uint64x2_t a)
8354 uint32x2_t result;
8355 __asm__ ("xtn %0.2s,%1.2d"
8356 : "=w"(result)
8357 : "w"(a)
8358 : /* No clobbers */);
8359 return result;
8362 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8363 vmul_n_f32 (float32x2_t a, float32_t b)
8365 float32x2_t result;
8366 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8367 : "=w"(result)
8368 : "w"(a), "w"(b)
8369 : /* No clobbers */);
8370 return result;
8373 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8374 vmul_n_s16 (int16x4_t a, int16_t b)
8376 int16x4_t result;
8377 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8378 : "=w"(result)
8379 : "w"(a), "x"(b)
8380 : /* No clobbers */);
8381 return result;
8384 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8385 vmul_n_s32 (int32x2_t a, int32_t b)
8387 int32x2_t result;
8388 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8389 : "=w"(result)
8390 : "w"(a), "w"(b)
8391 : /* No clobbers */);
8392 return result;
8395 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8396 vmul_n_u16 (uint16x4_t a, uint16_t b)
8398 uint16x4_t result;
8399 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8400 : "=w"(result)
8401 : "w"(a), "x"(b)
8402 : /* No clobbers */);
8403 return result;
8406 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8407 vmul_n_u32 (uint32x2_t a, uint32_t b)
8409 uint32x2_t result;
8410 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8411 : "=w"(result)
8412 : "w"(a), "w"(b)
8413 : /* No clobbers */);
8414 return result;
8417 #define vmull_high_lane_s16(a, b, c) \
8418 __extension__ \
8419 ({ \
8420 int16x4_t b_ = (b); \
8421 int16x8_t a_ = (a); \
8422 int32x4_t result; \
8423 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8424 : "=w"(result) \
8425 : "w"(a_), "x"(b_), "i"(c) \
8426 : /* No clobbers */); \
8427 result; \
8430 #define vmull_high_lane_s32(a, b, c) \
8431 __extension__ \
8432 ({ \
8433 int32x2_t b_ = (b); \
8434 int32x4_t a_ = (a); \
8435 int64x2_t result; \
8436 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8437 : "=w"(result) \
8438 : "w"(a_), "w"(b_), "i"(c) \
8439 : /* No clobbers */); \
8440 result; \
8443 #define vmull_high_lane_u16(a, b, c) \
8444 __extension__ \
8445 ({ \
8446 uint16x4_t b_ = (b); \
8447 uint16x8_t a_ = (a); \
8448 uint32x4_t result; \
8449 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8450 : "=w"(result) \
8451 : "w"(a_), "x"(b_), "i"(c) \
8452 : /* No clobbers */); \
8453 result; \
8456 #define vmull_high_lane_u32(a, b, c) \
8457 __extension__ \
8458 ({ \
8459 uint32x2_t b_ = (b); \
8460 uint32x4_t a_ = (a); \
8461 uint64x2_t result; \
8462 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8463 : "=w"(result) \
8464 : "w"(a_), "w"(b_), "i"(c) \
8465 : /* No clobbers */); \
8466 result; \
8469 #define vmull_high_laneq_s16(a, b, c) \
8470 __extension__ \
8471 ({ \
8472 int16x8_t b_ = (b); \
8473 int16x8_t a_ = (a); \
8474 int32x4_t result; \
8475 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8476 : "=w"(result) \
8477 : "w"(a_), "x"(b_), "i"(c) \
8478 : /* No clobbers */); \
8479 result; \
8482 #define vmull_high_laneq_s32(a, b, c) \
8483 __extension__ \
8484 ({ \
8485 int32x4_t b_ = (b); \
8486 int32x4_t a_ = (a); \
8487 int64x2_t result; \
8488 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8489 : "=w"(result) \
8490 : "w"(a_), "w"(b_), "i"(c) \
8491 : /* No clobbers */); \
8492 result; \
8495 #define vmull_high_laneq_u16(a, b, c) \
8496 __extension__ \
8497 ({ \
8498 uint16x8_t b_ = (b); \
8499 uint16x8_t a_ = (a); \
8500 uint32x4_t result; \
8501 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8502 : "=w"(result) \
8503 : "w"(a_), "x"(b_), "i"(c) \
8504 : /* No clobbers */); \
8505 result; \
8508 #define vmull_high_laneq_u32(a, b, c) \
8509 __extension__ \
8510 ({ \
8511 uint32x4_t b_ = (b); \
8512 uint32x4_t a_ = (a); \
8513 uint64x2_t result; \
8514 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8515 : "=w"(result) \
8516 : "w"(a_), "w"(b_), "i"(c) \
8517 : /* No clobbers */); \
8518 result; \
8521 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8522 vmull_high_n_s16 (int16x8_t a, int16_t b)
8524 int32x4_t result;
8525 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8526 : "=w"(result)
8527 : "w"(a), "x"(b)
8528 : /* No clobbers */);
8529 return result;
8532 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8533 vmull_high_n_s32 (int32x4_t a, int32_t b)
8535 int64x2_t result;
8536 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8537 : "=w"(result)
8538 : "w"(a), "w"(b)
8539 : /* No clobbers */);
8540 return result;
8543 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8544 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8546 uint32x4_t result;
8547 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8548 : "=w"(result)
8549 : "w"(a), "x"(b)
8550 : /* No clobbers */);
8551 return result;
8554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8555 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8557 uint64x2_t result;
8558 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8559 : "=w"(result)
8560 : "w"(a), "w"(b)
8561 : /* No clobbers */);
8562 return result;
8565 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8566 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8568 poly16x8_t result;
8569 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8570 : "=w"(result)
8571 : "w"(a), "w"(b)
8572 : /* No clobbers */);
8573 return result;
8576 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8577 vmull_high_s8 (int8x16_t a, int8x16_t b)
8579 int16x8_t result;
8580 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8581 : "=w"(result)
8582 : "w"(a), "w"(b)
8583 : /* No clobbers */);
8584 return result;
8587 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8588 vmull_high_s16 (int16x8_t a, int16x8_t b)
8590 int32x4_t result;
8591 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8592 : "=w"(result)
8593 : "w"(a), "w"(b)
8594 : /* No clobbers */);
8595 return result;
8598 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8599 vmull_high_s32 (int32x4_t a, int32x4_t b)
8601 int64x2_t result;
8602 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8603 : "=w"(result)
8604 : "w"(a), "w"(b)
8605 : /* No clobbers */);
8606 return result;
8609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8610 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8612 uint16x8_t result;
8613 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8614 : "=w"(result)
8615 : "w"(a), "w"(b)
8616 : /* No clobbers */);
8617 return result;
8620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8621 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8623 uint32x4_t result;
8624 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8625 : "=w"(result)
8626 : "w"(a), "w"(b)
8627 : /* No clobbers */);
8628 return result;
8631 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8632 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8634 uint64x2_t result;
8635 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8636 : "=w"(result)
8637 : "w"(a), "w"(b)
8638 : /* No clobbers */);
8639 return result;
8642 #define vmull_lane_s16(a, b, c) \
8643 __extension__ \
8644 ({ \
8645 int16x4_t b_ = (b); \
8646 int16x4_t a_ = (a); \
8647 int32x4_t result; \
8648 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8649 : "=w"(result) \
8650 : "w"(a_), "x"(b_), "i"(c) \
8651 : /* No clobbers */); \
8652 result; \
8655 #define vmull_lane_s32(a, b, c) \
8656 __extension__ \
8657 ({ \
8658 int32x2_t b_ = (b); \
8659 int32x2_t a_ = (a); \
8660 int64x2_t result; \
8661 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8662 : "=w"(result) \
8663 : "w"(a_), "w"(b_), "i"(c) \
8664 : /* No clobbers */); \
8665 result; \
8668 #define vmull_lane_u16(a, b, c) \
8669 __extension__ \
8670 ({ \
8671 uint16x4_t b_ = (b); \
8672 uint16x4_t a_ = (a); \
8673 uint32x4_t result; \
8674 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8675 : "=w"(result) \
8676 : "w"(a_), "x"(b_), "i"(c) \
8677 : /* No clobbers */); \
8678 result; \
8681 #define vmull_lane_u32(a, b, c) \
8682 __extension__ \
8683 ({ \
8684 uint32x2_t b_ = (b); \
8685 uint32x2_t a_ = (a); \
8686 uint64x2_t result; \
8687 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8688 : "=w"(result) \
8689 : "w"(a_), "w"(b_), "i"(c) \
8690 : /* No clobbers */); \
8691 result; \
8694 #define vmull_laneq_s16(a, b, c) \
8695 __extension__ \
8696 ({ \
8697 int16x8_t b_ = (b); \
8698 int16x4_t a_ = (a); \
8699 int32x4_t result; \
8700 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8701 : "=w"(result) \
8702 : "w"(a_), "x"(b_), "i"(c) \
8703 : /* No clobbers */); \
8704 result; \
8707 #define vmull_laneq_s32(a, b, c) \
8708 __extension__ \
8709 ({ \
8710 int32x4_t b_ = (b); \
8711 int32x2_t a_ = (a); \
8712 int64x2_t result; \
8713 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8714 : "=w"(result) \
8715 : "w"(a_), "w"(b_), "i"(c) \
8716 : /* No clobbers */); \
8717 result; \
8720 #define vmull_laneq_u16(a, b, c) \
8721 __extension__ \
8722 ({ \
8723 uint16x8_t b_ = (b); \
8724 uint16x4_t a_ = (a); \
8725 uint32x4_t result; \
8726 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8727 : "=w"(result) \
8728 : "w"(a_), "x"(b_), "i"(c) \
8729 : /* No clobbers */); \
8730 result; \
8733 #define vmull_laneq_u32(a, b, c) \
8734 __extension__ \
8735 ({ \
8736 uint32x4_t b_ = (b); \
8737 uint32x2_t a_ = (a); \
8738 uint64x2_t result; \
8739 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8740 : "=w"(result) \
8741 : "w"(a_), "w"(b_), "i"(c) \
8742 : /* No clobbers */); \
8743 result; \
8746 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8747 vmull_n_s16 (int16x4_t a, int16_t b)
8749 int32x4_t result;
8750 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8751 : "=w"(result)
8752 : "w"(a), "x"(b)
8753 : /* No clobbers */);
8754 return result;
8757 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8758 vmull_n_s32 (int32x2_t a, int32_t b)
8760 int64x2_t result;
8761 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8762 : "=w"(result)
8763 : "w"(a), "w"(b)
8764 : /* No clobbers */);
8765 return result;
8768 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8769 vmull_n_u16 (uint16x4_t a, uint16_t b)
8771 uint32x4_t result;
8772 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8773 : "=w"(result)
8774 : "w"(a), "x"(b)
8775 : /* No clobbers */);
8776 return result;
8779 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8780 vmull_n_u32 (uint32x2_t a, uint32_t b)
8782 uint64x2_t result;
8783 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8784 : "=w"(result)
8785 : "w"(a), "w"(b)
8786 : /* No clobbers */);
8787 return result;
8790 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8791 vmull_p8 (poly8x8_t a, poly8x8_t b)
8793 poly16x8_t result;
8794 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8795 : "=w"(result)
8796 : "w"(a), "w"(b)
8797 : /* No clobbers */);
8798 return result;
8801 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8802 vmull_s8 (int8x8_t a, int8x8_t b)
8804 int16x8_t result;
8805 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8806 : "=w"(result)
8807 : "w"(a), "w"(b)
8808 : /* No clobbers */);
8809 return result;
8812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8813 vmull_s16 (int16x4_t a, int16x4_t b)
8815 int32x4_t result;
8816 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8817 : "=w"(result)
8818 : "w"(a), "w"(b)
8819 : /* No clobbers */);
8820 return result;
8823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8824 vmull_s32 (int32x2_t a, int32x2_t b)
8826 int64x2_t result;
8827 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8828 : "=w"(result)
8829 : "w"(a), "w"(b)
8830 : /* No clobbers */);
8831 return result;
8834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8835 vmull_u8 (uint8x8_t a, uint8x8_t b)
8837 uint16x8_t result;
8838 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8839 : "=w"(result)
8840 : "w"(a), "w"(b)
8841 : /* No clobbers */);
8842 return result;
8845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8846 vmull_u16 (uint16x4_t a, uint16x4_t b)
8848 uint32x4_t result;
8849 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8850 : "=w"(result)
8851 : "w"(a), "w"(b)
8852 : /* No clobbers */);
8853 return result;
8856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8857 vmull_u32 (uint32x2_t a, uint32x2_t b)
8859 uint64x2_t result;
8860 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8861 : "=w"(result)
8862 : "w"(a), "w"(b)
8863 : /* No clobbers */);
8864 return result;
8867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8868 vmulq_n_f32 (float32x4_t a, float32_t b)
8870 float32x4_t result;
8871 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8872 : "=w"(result)
8873 : "w"(a), "w"(b)
8874 : /* No clobbers */);
8875 return result;
8878 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8879 vmulq_n_f64 (float64x2_t a, float64_t b)
8881 float64x2_t result;
8882 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8883 : "=w"(result)
8884 : "w"(a), "w"(b)
8885 : /* No clobbers */);
8886 return result;
8889 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8890 vmulq_n_s16 (int16x8_t a, int16_t b)
8892 int16x8_t result;
8893 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8894 : "=w"(result)
8895 : "w"(a), "x"(b)
8896 : /* No clobbers */);
8897 return result;
8900 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8901 vmulq_n_s32 (int32x4_t a, int32_t b)
8903 int32x4_t result;
8904 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8905 : "=w"(result)
8906 : "w"(a), "w"(b)
8907 : /* No clobbers */);
8908 return result;
8911 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8912 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8914 uint16x8_t result;
8915 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8916 : "=w"(result)
8917 : "w"(a), "x"(b)
8918 : /* No clobbers */);
8919 return result;
8922 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8923 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8925 uint32x4_t result;
8926 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8927 : "=w"(result)
8928 : "w"(a), "w"(b)
8929 : /* No clobbers */);
8930 return result;
8933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8934 vmulx_f32 (float32x2_t a, float32x2_t b)
8936 float32x2_t result;
8937 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8938 : "=w"(result)
8939 : "w"(a), "w"(b)
8940 : /* No clobbers */);
8941 return result;
8944 #define vmulx_lane_f32(a, b, c) \
8945 __extension__ \
8946 ({ \
8947 float32x4_t b_ = (b); \
8948 float32x2_t a_ = (a); \
8949 float32x2_t result; \
8950 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8951 : "=w"(result) \
8952 : "w"(a_), "w"(b_), "i"(c) \
8953 : /* No clobbers */); \
8954 result; \
8957 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8958 vmulxd_f64 (float64_t a, float64_t b)
8960 float64_t result;
8961 __asm__ ("fmulx %d0, %d1, %d2"
8962 : "=w"(result)
8963 : "w"(a), "w"(b)
8964 : /* No clobbers */);
8965 return result;
8968 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8969 vmulxq_f32 (float32x4_t a, float32x4_t b)
8971 float32x4_t result;
8972 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8973 : "=w"(result)
8974 : "w"(a), "w"(b)
8975 : /* No clobbers */);
8976 return result;
8979 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8980 vmulxq_f64 (float64x2_t a, float64x2_t b)
8982 float64x2_t result;
8983 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8984 : "=w"(result)
8985 : "w"(a), "w"(b)
8986 : /* No clobbers */);
8987 return result;
8990 #define vmulxq_lane_f32(a, b, c) \
8991 __extension__ \
8992 ({ \
8993 float32x4_t b_ = (b); \
8994 float32x4_t a_ = (a); \
8995 float32x4_t result; \
8996 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8997 : "=w"(result) \
8998 : "w"(a_), "w"(b_), "i"(c) \
8999 : /* No clobbers */); \
9000 result; \
9003 #define vmulxq_lane_f64(a, b, c) \
9004 __extension__ \
9005 ({ \
9006 float64x2_t b_ = (b); \
9007 float64x2_t a_ = (a); \
9008 float64x2_t result; \
9009 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9010 : "=w"(result) \
9011 : "w"(a_), "w"(b_), "i"(c) \
9012 : /* No clobbers */); \
9013 result; \
9016 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9017 vmulxs_f32 (float32_t a, float32_t b)
9019 float32_t result;
9020 __asm__ ("fmulx %s0, %s1, %s2"
9021 : "=w"(result)
9022 : "w"(a), "w"(b)
9023 : /* No clobbers */);
9024 return result;
9027 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9028 vmvn_p8 (poly8x8_t a)
9030 poly8x8_t result;
9031 __asm__ ("mvn %0.8b,%1.8b"
9032 : "=w"(result)
9033 : "w"(a)
9034 : /* No clobbers */);
9035 return result;
9038 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9039 vmvn_s8 (int8x8_t a)
9041 int8x8_t result;
9042 __asm__ ("mvn %0.8b,%1.8b"
9043 : "=w"(result)
9044 : "w"(a)
9045 : /* No clobbers */);
9046 return result;
9049 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9050 vmvn_s16 (int16x4_t a)
9052 int16x4_t result;
9053 __asm__ ("mvn %0.8b,%1.8b"
9054 : "=w"(result)
9055 : "w"(a)
9056 : /* No clobbers */);
9057 return result;
9060 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9061 vmvn_s32 (int32x2_t a)
9063 int32x2_t result;
9064 __asm__ ("mvn %0.8b,%1.8b"
9065 : "=w"(result)
9066 : "w"(a)
9067 : /* No clobbers */);
9068 return result;
9071 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9072 vmvn_u8 (uint8x8_t a)
9074 uint8x8_t result;
9075 __asm__ ("mvn %0.8b,%1.8b"
9076 : "=w"(result)
9077 : "w"(a)
9078 : /* No clobbers */);
9079 return result;
9082 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9083 vmvn_u16 (uint16x4_t a)
9085 uint16x4_t result;
9086 __asm__ ("mvn %0.8b,%1.8b"
9087 : "=w"(result)
9088 : "w"(a)
9089 : /* No clobbers */);
9090 return result;
9093 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9094 vmvn_u32 (uint32x2_t a)
9096 uint32x2_t result;
9097 __asm__ ("mvn %0.8b,%1.8b"
9098 : "=w"(result)
9099 : "w"(a)
9100 : /* No clobbers */);
9101 return result;
9104 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9105 vmvnq_p8 (poly8x16_t a)
9107 poly8x16_t result;
9108 __asm__ ("mvn %0.16b,%1.16b"
9109 : "=w"(result)
9110 : "w"(a)
9111 : /* No clobbers */);
9112 return result;
9115 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9116 vmvnq_s8 (int8x16_t a)
9118 int8x16_t result;
9119 __asm__ ("mvn %0.16b,%1.16b"
9120 : "=w"(result)
9121 : "w"(a)
9122 : /* No clobbers */);
9123 return result;
9126 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9127 vmvnq_s16 (int16x8_t a)
9129 int16x8_t result;
9130 __asm__ ("mvn %0.16b,%1.16b"
9131 : "=w"(result)
9132 : "w"(a)
9133 : /* No clobbers */);
9134 return result;
9137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9138 vmvnq_s32 (int32x4_t a)
9140 int32x4_t result;
9141 __asm__ ("mvn %0.16b,%1.16b"
9142 : "=w"(result)
9143 : "w"(a)
9144 : /* No clobbers */);
9145 return result;
9148 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9149 vmvnq_u8 (uint8x16_t a)
9151 uint8x16_t result;
9152 __asm__ ("mvn %0.16b,%1.16b"
9153 : "=w"(result)
9154 : "w"(a)
9155 : /* No clobbers */);
9156 return result;
9159 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9160 vmvnq_u16 (uint16x8_t a)
9162 uint16x8_t result;
9163 __asm__ ("mvn %0.16b,%1.16b"
9164 : "=w"(result)
9165 : "w"(a)
9166 : /* No clobbers */);
9167 return result;
9170 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9171 vmvnq_u32 (uint32x4_t a)
9173 uint32x4_t result;
9174 __asm__ ("mvn %0.16b,%1.16b"
9175 : "=w"(result)
9176 : "w"(a)
9177 : /* No clobbers */);
9178 return result;
9182 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9183 vpadal_s8 (int16x4_t a, int8x8_t b)
9185 int16x4_t result;
9186 __asm__ ("sadalp %0.4h,%2.8b"
9187 : "=w"(result)
9188 : "0"(a), "w"(b)
9189 : /* No clobbers */);
9190 return result;
9193 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9194 vpadal_s16 (int32x2_t a, int16x4_t b)
9196 int32x2_t result;
9197 __asm__ ("sadalp %0.2s,%2.4h"
9198 : "=w"(result)
9199 : "0"(a), "w"(b)
9200 : /* No clobbers */);
9201 return result;
9204 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9205 vpadal_s32 (int64x1_t a, int32x2_t b)
9207 int64x1_t result;
9208 __asm__ ("sadalp %0.1d,%2.2s"
9209 : "=w"(result)
9210 : "0"(a), "w"(b)
9211 : /* No clobbers */);
9212 return result;
9215 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9216 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9218 uint16x4_t result;
9219 __asm__ ("uadalp %0.4h,%2.8b"
9220 : "=w"(result)
9221 : "0"(a), "w"(b)
9222 : /* No clobbers */);
9223 return result;
9226 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9227 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9229 uint32x2_t result;
9230 __asm__ ("uadalp %0.2s,%2.4h"
9231 : "=w"(result)
9232 : "0"(a), "w"(b)
9233 : /* No clobbers */);
9234 return result;
9237 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9238 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9240 uint64x1_t result;
9241 __asm__ ("uadalp %0.1d,%2.2s"
9242 : "=w"(result)
9243 : "0"(a), "w"(b)
9244 : /* No clobbers */);
9245 return result;
9248 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9249 vpadalq_s8 (int16x8_t a, int8x16_t b)
9251 int16x8_t result;
9252 __asm__ ("sadalp %0.8h,%2.16b"
9253 : "=w"(result)
9254 : "0"(a), "w"(b)
9255 : /* No clobbers */);
9256 return result;
9259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9260 vpadalq_s16 (int32x4_t a, int16x8_t b)
9262 int32x4_t result;
9263 __asm__ ("sadalp %0.4s,%2.8h"
9264 : "=w"(result)
9265 : "0"(a), "w"(b)
9266 : /* No clobbers */);
9267 return result;
9270 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9271 vpadalq_s32 (int64x2_t a, int32x4_t b)
9273 int64x2_t result;
9274 __asm__ ("sadalp %0.2d,%2.4s"
9275 : "=w"(result)
9276 : "0"(a), "w"(b)
9277 : /* No clobbers */);
9278 return result;
9281 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9282 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9284 uint16x8_t result;
9285 __asm__ ("uadalp %0.8h,%2.16b"
9286 : "=w"(result)
9287 : "0"(a), "w"(b)
9288 : /* No clobbers */);
9289 return result;
9292 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9293 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9295 uint32x4_t result;
9296 __asm__ ("uadalp %0.4s,%2.8h"
9297 : "=w"(result)
9298 : "0"(a), "w"(b)
9299 : /* No clobbers */);
9300 return result;
9303 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9304 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9306 uint64x2_t result;
9307 __asm__ ("uadalp %0.2d,%2.4s"
9308 : "=w"(result)
9309 : "0"(a), "w"(b)
9310 : /* No clobbers */);
9311 return result;
9314 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9315 vpadd_f32 (float32x2_t a, float32x2_t b)
9317 float32x2_t result;
9318 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9319 : "=w"(result)
9320 : "w"(a), "w"(b)
9321 : /* No clobbers */);
9322 return result;
9325 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9326 vpaddl_s8 (int8x8_t a)
9328 int16x4_t result;
9329 __asm__ ("saddlp %0.4h,%1.8b"
9330 : "=w"(result)
9331 : "w"(a)
9332 : /* No clobbers */);
9333 return result;
9336 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9337 vpaddl_s16 (int16x4_t a)
9339 int32x2_t result;
9340 __asm__ ("saddlp %0.2s,%1.4h"
9341 : "=w"(result)
9342 : "w"(a)
9343 : /* No clobbers */);
9344 return result;
9347 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9348 vpaddl_s32 (int32x2_t a)
9350 int64x1_t result;
9351 __asm__ ("saddlp %0.1d,%1.2s"
9352 : "=w"(result)
9353 : "w"(a)
9354 : /* No clobbers */);
9355 return result;
9358 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9359 vpaddl_u8 (uint8x8_t a)
9361 uint16x4_t result;
9362 __asm__ ("uaddlp %0.4h,%1.8b"
9363 : "=w"(result)
9364 : "w"(a)
9365 : /* No clobbers */);
9366 return result;
9369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9370 vpaddl_u16 (uint16x4_t a)
9372 uint32x2_t result;
9373 __asm__ ("uaddlp %0.2s,%1.4h"
9374 : "=w"(result)
9375 : "w"(a)
9376 : /* No clobbers */);
9377 return result;
9380 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9381 vpaddl_u32 (uint32x2_t a)
9383 uint64x1_t result;
9384 __asm__ ("uaddlp %0.1d,%1.2s"
9385 : "=w"(result)
9386 : "w"(a)
9387 : /* No clobbers */);
9388 return result;
9391 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9392 vpaddlq_s8 (int8x16_t a)
9394 int16x8_t result;
9395 __asm__ ("saddlp %0.8h,%1.16b"
9396 : "=w"(result)
9397 : "w"(a)
9398 : /* No clobbers */);
9399 return result;
9402 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9403 vpaddlq_s16 (int16x8_t a)
9405 int32x4_t result;
9406 __asm__ ("saddlp %0.4s,%1.8h"
9407 : "=w"(result)
9408 : "w"(a)
9409 : /* No clobbers */);
9410 return result;
9413 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9414 vpaddlq_s32 (int32x4_t a)
9416 int64x2_t result;
9417 __asm__ ("saddlp %0.2d,%1.4s"
9418 : "=w"(result)
9419 : "w"(a)
9420 : /* No clobbers */);
9421 return result;
9424 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9425 vpaddlq_u8 (uint8x16_t a)
9427 uint16x8_t result;
9428 __asm__ ("uaddlp %0.8h,%1.16b"
9429 : "=w"(result)
9430 : "w"(a)
9431 : /* No clobbers */);
9432 return result;
9435 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9436 vpaddlq_u16 (uint16x8_t a)
9438 uint32x4_t result;
9439 __asm__ ("uaddlp %0.4s,%1.8h"
9440 : "=w"(result)
9441 : "w"(a)
9442 : /* No clobbers */);
9443 return result;
9446 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9447 vpaddlq_u32 (uint32x4_t a)
9449 uint64x2_t result;
9450 __asm__ ("uaddlp %0.2d,%1.4s"
9451 : "=w"(result)
9452 : "w"(a)
9453 : /* No clobbers */);
9454 return result;
9457 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9458 vpaddq_f32 (float32x4_t a, float32x4_t b)
9460 float32x4_t result;
9461 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9462 : "=w"(result)
9463 : "w"(a), "w"(b)
9464 : /* No clobbers */);
9465 return result;
9468 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9469 vpaddq_f64 (float64x2_t a, float64x2_t b)
9471 float64x2_t result;
9472 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9473 : "=w"(result)
9474 : "w"(a), "w"(b)
9475 : /* No clobbers */);
9476 return result;
9479 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9480 vpaddq_s8 (int8x16_t a, int8x16_t b)
9482 int8x16_t result;
9483 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9484 : "=w"(result)
9485 : "w"(a), "w"(b)
9486 : /* No clobbers */);
9487 return result;
9490 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9491 vpaddq_s16 (int16x8_t a, int16x8_t b)
9493 int16x8_t result;
9494 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9495 : "=w"(result)
9496 : "w"(a), "w"(b)
9497 : /* No clobbers */);
9498 return result;
9501 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9502 vpaddq_s32 (int32x4_t a, int32x4_t b)
9504 int32x4_t result;
9505 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9506 : "=w"(result)
9507 : "w"(a), "w"(b)
9508 : /* No clobbers */);
9509 return result;
9512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9513 vpaddq_s64 (int64x2_t a, int64x2_t b)
9515 int64x2_t result;
9516 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9517 : "=w"(result)
9518 : "w"(a), "w"(b)
9519 : /* No clobbers */);
9520 return result;
9523 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9524 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9526 uint8x16_t result;
9527 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9528 : "=w"(result)
9529 : "w"(a), "w"(b)
9530 : /* No clobbers */);
9531 return result;
9534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9535 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9537 uint16x8_t result;
9538 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9539 : "=w"(result)
9540 : "w"(a), "w"(b)
9541 : /* No clobbers */);
9542 return result;
9545 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9546 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9548 uint32x4_t result;
9549 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9550 : "=w"(result)
9551 : "w"(a), "w"(b)
9552 : /* No clobbers */);
9553 return result;
9556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9557 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9559 uint64x2_t result;
9560 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9561 : "=w"(result)
9562 : "w"(a), "w"(b)
9563 : /* No clobbers */);
9564 return result;
9567 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9568 vpadds_f32 (float32x2_t a)
9570 float32_t result;
9571 __asm__ ("faddp %s0,%1.2s"
9572 : "=w"(result)
9573 : "w"(a)
9574 : /* No clobbers */);
9575 return result;
9578 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9579 vpmax_f32 (float32x2_t a, float32x2_t b)
9581 float32x2_t result;
9582 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9583 : "=w"(result)
9584 : "w"(a), "w"(b)
9585 : /* No clobbers */);
9586 return result;
9589 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9590 vpmax_s8 (int8x8_t a, int8x8_t b)
9592 int8x8_t result;
9593 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9594 : "=w"(result)
9595 : "w"(a), "w"(b)
9596 : /* No clobbers */);
9597 return result;
9600 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9601 vpmax_s16 (int16x4_t a, int16x4_t b)
9603 int16x4_t result;
9604 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9605 : "=w"(result)
9606 : "w"(a), "w"(b)
9607 : /* No clobbers */);
9608 return result;
9611 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9612 vpmax_s32 (int32x2_t a, int32x2_t b)
9614 int32x2_t result;
9615 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9616 : "=w"(result)
9617 : "w"(a), "w"(b)
9618 : /* No clobbers */);
9619 return result;
9622 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9623 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9625 uint8x8_t result;
9626 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9627 : "=w"(result)
9628 : "w"(a), "w"(b)
9629 : /* No clobbers */);
9630 return result;
9633 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9634 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9636 uint16x4_t result;
9637 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9638 : "=w"(result)
9639 : "w"(a), "w"(b)
9640 : /* No clobbers */);
9641 return result;
9644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9645 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9647 uint32x2_t result;
9648 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9649 : "=w"(result)
9650 : "w"(a), "w"(b)
9651 : /* No clobbers */);
9652 return result;
9655 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9656 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9658 float32x2_t result;
9659 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9660 : "=w"(result)
9661 : "w"(a), "w"(b)
9662 : /* No clobbers */);
9663 return result;
9666 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9667 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9669 float32x4_t result;
9670 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9671 : "=w"(result)
9672 : "w"(a), "w"(b)
9673 : /* No clobbers */);
9674 return result;
9677 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9678 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9680 float64x2_t result;
9681 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9682 : "=w"(result)
9683 : "w"(a), "w"(b)
9684 : /* No clobbers */);
9685 return result;
9688 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9689 vpmaxnmqd_f64 (float64x2_t a)
9691 float64_t result;
9692 __asm__ ("fmaxnmp %d0,%1.2d"
9693 : "=w"(result)
9694 : "w"(a)
9695 : /* No clobbers */);
9696 return result;
9699 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9700 vpmaxnms_f32 (float32x2_t a)
9702 float32_t result;
9703 __asm__ ("fmaxnmp %s0,%1.2s"
9704 : "=w"(result)
9705 : "w"(a)
9706 : /* No clobbers */);
9707 return result;
9710 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9711 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9713 float32x4_t result;
9714 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9715 : "=w"(result)
9716 : "w"(a), "w"(b)
9717 : /* No clobbers */);
9718 return result;
9721 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9722 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9724 float64x2_t result;
9725 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9726 : "=w"(result)
9727 : "w"(a), "w"(b)
9728 : /* No clobbers */);
9729 return result;
9732 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9733 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9735 int8x16_t result;
9736 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9737 : "=w"(result)
9738 : "w"(a), "w"(b)
9739 : /* No clobbers */);
9740 return result;
9743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9744 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9746 int16x8_t result;
9747 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9748 : "=w"(result)
9749 : "w"(a), "w"(b)
9750 : /* No clobbers */);
9751 return result;
9754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9755 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9757 int32x4_t result;
9758 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9759 : "=w"(result)
9760 : "w"(a), "w"(b)
9761 : /* No clobbers */);
9762 return result;
9765 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9766 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9768 uint8x16_t result;
9769 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9770 : "=w"(result)
9771 : "w"(a), "w"(b)
9772 : /* No clobbers */);
9773 return result;
9776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9777 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9779 uint16x8_t result;
9780 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9781 : "=w"(result)
9782 : "w"(a), "w"(b)
9783 : /* No clobbers */);
9784 return result;
9787 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9788 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9790 uint32x4_t result;
9791 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9792 : "=w"(result)
9793 : "w"(a), "w"(b)
9794 : /* No clobbers */);
9795 return result;
9798 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9799 vpmaxqd_f64 (float64x2_t a)
9801 float64_t result;
9802 __asm__ ("fmaxp %d0,%1.2d"
9803 : "=w"(result)
9804 : "w"(a)
9805 : /* No clobbers */);
9806 return result;
9809 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9810 vpmaxs_f32 (float32x2_t a)
9812 float32_t result;
9813 __asm__ ("fmaxp %s0,%1.2s"
9814 : "=w"(result)
9815 : "w"(a)
9816 : /* No clobbers */);
9817 return result;
9820 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9821 vpmin_f32 (float32x2_t a, float32x2_t b)
9823 float32x2_t result;
9824 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9825 : "=w"(result)
9826 : "w"(a), "w"(b)
9827 : /* No clobbers */);
9828 return result;
9831 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9832 vpmin_s8 (int8x8_t a, int8x8_t b)
9834 int8x8_t result;
9835 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9836 : "=w"(result)
9837 : "w"(a), "w"(b)
9838 : /* No clobbers */);
9839 return result;
9842 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9843 vpmin_s16 (int16x4_t a, int16x4_t b)
9845 int16x4_t result;
9846 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9847 : "=w"(result)
9848 : "w"(a), "w"(b)
9849 : /* No clobbers */);
9850 return result;
9853 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9854 vpmin_s32 (int32x2_t a, int32x2_t b)
9856 int32x2_t result;
9857 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9858 : "=w"(result)
9859 : "w"(a), "w"(b)
9860 : /* No clobbers */);
9861 return result;
9864 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9865 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9867 uint8x8_t result;
9868 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9869 : "=w"(result)
9870 : "w"(a), "w"(b)
9871 : /* No clobbers */);
9872 return result;
9875 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9876 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9878 uint16x4_t result;
9879 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9880 : "=w"(result)
9881 : "w"(a), "w"(b)
9882 : /* No clobbers */);
9883 return result;
9886 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9887 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9889 uint32x2_t result;
9890 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9891 : "=w"(result)
9892 : "w"(a), "w"(b)
9893 : /* No clobbers */);
9894 return result;
9897 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9898 vpminnm_f32 (float32x2_t a, float32x2_t b)
9900 float32x2_t result;
9901 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9902 : "=w"(result)
9903 : "w"(a), "w"(b)
9904 : /* No clobbers */);
9905 return result;
9908 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9909 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9911 float32x4_t result;
9912 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9913 : "=w"(result)
9914 : "w"(a), "w"(b)
9915 : /* No clobbers */);
9916 return result;
9919 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9920 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9922 float64x2_t result;
9923 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9924 : "=w"(result)
9925 : "w"(a), "w"(b)
9926 : /* No clobbers */);
9927 return result;
9930 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9931 vpminnmqd_f64 (float64x2_t a)
9933 float64_t result;
9934 __asm__ ("fminnmp %d0,%1.2d"
9935 : "=w"(result)
9936 : "w"(a)
9937 : /* No clobbers */);
9938 return result;
9941 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9942 vpminnms_f32 (float32x2_t a)
9944 float32_t result;
9945 __asm__ ("fminnmp %s0,%1.2s"
9946 : "=w"(result)
9947 : "w"(a)
9948 : /* No clobbers */);
9949 return result;
9952 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9953 vpminq_f32 (float32x4_t a, float32x4_t b)
9955 float32x4_t result;
9956 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9957 : "=w"(result)
9958 : "w"(a), "w"(b)
9959 : /* No clobbers */);
9960 return result;
9963 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9964 vpminq_f64 (float64x2_t a, float64x2_t b)
9966 float64x2_t result;
9967 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9968 : "=w"(result)
9969 : "w"(a), "w"(b)
9970 : /* No clobbers */);
9971 return result;
9974 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9975 vpminq_s8 (int8x16_t a, int8x16_t b)
9977 int8x16_t result;
9978 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9979 : "=w"(result)
9980 : "w"(a), "w"(b)
9981 : /* No clobbers */);
9982 return result;
9985 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9986 vpminq_s16 (int16x8_t a, int16x8_t b)
9988 int16x8_t result;
9989 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9990 : "=w"(result)
9991 : "w"(a), "w"(b)
9992 : /* No clobbers */);
9993 return result;
9996 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9997 vpminq_s32 (int32x4_t a, int32x4_t b)
9999 int32x4_t result;
10000 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10001 : "=w"(result)
10002 : "w"(a), "w"(b)
10003 : /* No clobbers */);
10004 return result;
10007 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10008 vpminq_u8 (uint8x16_t a, uint8x16_t b)
10010 uint8x16_t result;
10011 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10012 : "=w"(result)
10013 : "w"(a), "w"(b)
10014 : /* No clobbers */);
10015 return result;
10018 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10019 vpminq_u16 (uint16x8_t a, uint16x8_t b)
10021 uint16x8_t result;
10022 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10023 : "=w"(result)
10024 : "w"(a), "w"(b)
10025 : /* No clobbers */);
10026 return result;
10029 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10030 vpminq_u32 (uint32x4_t a, uint32x4_t b)
10032 uint32x4_t result;
10033 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10034 : "=w"(result)
10035 : "w"(a), "w"(b)
10036 : /* No clobbers */);
10037 return result;
10040 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10041 vpminqd_f64 (float64x2_t a)
10043 float64_t result;
10044 __asm__ ("fminp %d0,%1.2d"
10045 : "=w"(result)
10046 : "w"(a)
10047 : /* No clobbers */);
10048 return result;
10051 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10052 vpmins_f32 (float32x2_t a)
10054 float32_t result;
10055 __asm__ ("fminp %s0,%1.2s"
10056 : "=w"(result)
10057 : "w"(a)
10058 : /* No clobbers */);
10059 return result;
10062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10063 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10065 int16x4_t result;
10066 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10067 : "=w"(result)
10068 : "w"(a), "x"(b)
10069 : /* No clobbers */);
10070 return result;
10073 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10074 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10076 int32x2_t result;
10077 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10078 : "=w"(result)
10079 : "w"(a), "w"(b)
10080 : /* No clobbers */);
10081 return result;
10084 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10085 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10087 int16x8_t result;
10088 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10089 : "=w"(result)
10090 : "w"(a), "x"(b)
10091 : /* No clobbers */);
10092 return result;
10095 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10096 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10098 int32x4_t result;
10099 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10100 : "=w"(result)
10101 : "w"(a), "w"(b)
10102 : /* No clobbers */);
10103 return result;
10106 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10107 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10109 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10110 __asm__ ("sqxtn2 %0.16b, %1.8h"
10111 : "+w"(result)
10112 : "w"(b)
10113 : /* No clobbers */);
10114 return result;
10117 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10118 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10120 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10121 __asm__ ("sqxtn2 %0.8h, %1.4s"
10122 : "+w"(result)
10123 : "w"(b)
10124 : /* No clobbers */);
10125 return result;
10128 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10129 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10131 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10132 __asm__ ("sqxtn2 %0.4s, %1.2d"
10133 : "+w"(result)
10134 : "w"(b)
10135 : /* No clobbers */);
10136 return result;
10139 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10140 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10142 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10143 __asm__ ("uqxtn2 %0.16b, %1.8h"
10144 : "+w"(result)
10145 : "w"(b)
10146 : /* No clobbers */);
10147 return result;
10150 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10151 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10153 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10154 __asm__ ("uqxtn2 %0.8h, %1.4s"
10155 : "+w"(result)
10156 : "w"(b)
10157 : /* No clobbers */);
10158 return result;
10161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10162 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10164 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10165 __asm__ ("uqxtn2 %0.4s, %1.2d"
10166 : "+w"(result)
10167 : "w"(b)
10168 : /* No clobbers */);
10169 return result;
10172 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10173 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10175 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10176 __asm__ ("sqxtun2 %0.16b, %1.8h"
10177 : "+w"(result)
10178 : "w"(b)
10179 : /* No clobbers */);
10180 return result;
10183 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10184 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10186 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10187 __asm__ ("sqxtun2 %0.8h, %1.4s"
10188 : "+w"(result)
10189 : "w"(b)
10190 : /* No clobbers */);
10191 return result;
10194 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10195 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10197 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10198 __asm__ ("sqxtun2 %0.4s, %1.2d"
10199 : "+w"(result)
10200 : "w"(b)
10201 : /* No clobbers */);
10202 return result;
10205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10206 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10208 int16x4_t result;
10209 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10210 : "=w"(result)
10211 : "w"(a), "x"(b)
10212 : /* No clobbers */);
10213 return result;
10216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10217 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10219 int32x2_t result;
10220 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10221 : "=w"(result)
10222 : "w"(a), "w"(b)
10223 : /* No clobbers */);
10224 return result;
10227 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10228 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10230 int16x8_t result;
10231 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10232 : "=w"(result)
10233 : "w"(a), "x"(b)
10234 : /* No clobbers */);
10235 return result;
10238 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10239 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10241 int32x4_t result;
10242 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10243 : "=w"(result)
10244 : "w"(a), "w"(b)
10245 : /* No clobbers */);
10246 return result;
10249 #define vqrshrn_high_n_s16(a, b, c) \
10250 __extension__ \
10251 ({ \
10252 int16x8_t b_ = (b); \
10253 int8x8_t a_ = (a); \
10254 int8x16_t result = vcombine_s8 \
10255 (a_, vcreate_s8 \
10256 (__AARCH64_UINT64_C (0x0))); \
10257 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10258 : "+w"(result) \
10259 : "w"(b_), "i"(c) \
10260 : /* No clobbers */); \
10261 result; \
10264 #define vqrshrn_high_n_s32(a, b, c) \
10265 __extension__ \
10266 ({ \
10267 int32x4_t b_ = (b); \
10268 int16x4_t a_ = (a); \
10269 int16x8_t result = vcombine_s16 \
10270 (a_, vcreate_s16 \
10271 (__AARCH64_UINT64_C (0x0))); \
10272 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10273 : "+w"(result) \
10274 : "w"(b_), "i"(c) \
10275 : /* No clobbers */); \
10276 result; \
10279 #define vqrshrn_high_n_s64(a, b, c) \
10280 __extension__ \
10281 ({ \
10282 int64x2_t b_ = (b); \
10283 int32x2_t a_ = (a); \
10284 int32x4_t result = vcombine_s32 \
10285 (a_, vcreate_s32 \
10286 (__AARCH64_UINT64_C (0x0))); \
10287 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10288 : "+w"(result) \
10289 : "w"(b_), "i"(c) \
10290 : /* No clobbers */); \
10291 result; \
10294 #define vqrshrn_high_n_u16(a, b, c) \
10295 __extension__ \
10296 ({ \
10297 uint16x8_t b_ = (b); \
10298 uint8x8_t a_ = (a); \
10299 uint8x16_t result = vcombine_u8 \
10300 (a_, vcreate_u8 \
10301 (__AARCH64_UINT64_C (0x0))); \
10302 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10303 : "+w"(result) \
10304 : "w"(b_), "i"(c) \
10305 : /* No clobbers */); \
10306 result; \
10309 #define vqrshrn_high_n_u32(a, b, c) \
10310 __extension__ \
10311 ({ \
10312 uint32x4_t b_ = (b); \
10313 uint16x4_t a_ = (a); \
10314 uint16x8_t result = vcombine_u16 \
10315 (a_, vcreate_u16 \
10316 (__AARCH64_UINT64_C (0x0))); \
10317 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10318 : "+w"(result) \
10319 : "w"(b_), "i"(c) \
10320 : /* No clobbers */); \
10321 result; \
10324 #define vqrshrn_high_n_u64(a, b, c) \
10325 __extension__ \
10326 ({ \
10327 uint64x2_t b_ = (b); \
10328 uint32x2_t a_ = (a); \
10329 uint32x4_t result = vcombine_u32 \
10330 (a_, vcreate_u32 \
10331 (__AARCH64_UINT64_C (0x0))); \
10332 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10333 : "+w"(result) \
10334 : "w"(b_), "i"(c) \
10335 : /* No clobbers */); \
10336 result; \
10339 #define vqrshrun_high_n_s16(a, b, c) \
10340 __extension__ \
10341 ({ \
10342 int16x8_t b_ = (b); \
10343 uint8x8_t a_ = (a); \
10344 uint8x16_t result = vcombine_u8 \
10345 (a_, vcreate_u8 \
10346 (__AARCH64_UINT64_C (0x0))); \
10347 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10348 : "+w"(result) \
10349 : "w"(b_), "i"(c) \
10350 : /* No clobbers */); \
10351 result; \
10354 #define vqrshrun_high_n_s32(a, b, c) \
10355 __extension__ \
10356 ({ \
10357 int32x4_t b_ = (b); \
10358 uint16x4_t a_ = (a); \
10359 uint16x8_t result = vcombine_u16 \
10360 (a_, vcreate_u16 \
10361 (__AARCH64_UINT64_C (0x0))); \
10362 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10363 : "+w"(result) \
10364 : "w"(b_), "i"(c) \
10365 : /* No clobbers */); \
10366 result; \
10369 #define vqrshrun_high_n_s64(a, b, c) \
10370 __extension__ \
10371 ({ \
10372 int64x2_t b_ = (b); \
10373 uint32x2_t a_ = (a); \
10374 uint32x4_t result = vcombine_u32 \
10375 (a_, vcreate_u32 \
10376 (__AARCH64_UINT64_C (0x0))); \
10377 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10378 : "+w"(result) \
10379 : "w"(b_), "i"(c) \
10380 : /* No clobbers */); \
10381 result; \
10384 #define vqshrn_high_n_s16(a, b, c) \
10385 __extension__ \
10386 ({ \
10387 int16x8_t b_ = (b); \
10388 int8x8_t a_ = (a); \
10389 int8x16_t result = vcombine_s8 \
10390 (a_, vcreate_s8 \
10391 (__AARCH64_UINT64_C (0x0))); \
10392 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10393 : "+w"(result) \
10394 : "w"(b_), "i"(c) \
10395 : /* No clobbers */); \
10396 result; \
10399 #define vqshrn_high_n_s32(a, b, c) \
10400 __extension__ \
10401 ({ \
10402 int32x4_t b_ = (b); \
10403 int16x4_t a_ = (a); \
10404 int16x8_t result = vcombine_s16 \
10405 (a_, vcreate_s16 \
10406 (__AARCH64_UINT64_C (0x0))); \
10407 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10408 : "+w"(result) \
10409 : "w"(b_), "i"(c) \
10410 : /* No clobbers */); \
10411 result; \
10414 #define vqshrn_high_n_s64(a, b, c) \
10415 __extension__ \
10416 ({ \
10417 int64x2_t b_ = (b); \
10418 int32x2_t a_ = (a); \
10419 int32x4_t result = vcombine_s32 \
10420 (a_, vcreate_s32 \
10421 (__AARCH64_UINT64_C (0x0))); \
10422 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10423 : "+w"(result) \
10424 : "w"(b_), "i"(c) \
10425 : /* No clobbers */); \
10426 result; \
10429 #define vqshrn_high_n_u16(a, b, c) \
10430 __extension__ \
10431 ({ \
10432 uint16x8_t b_ = (b); \
10433 uint8x8_t a_ = (a); \
10434 uint8x16_t result = vcombine_u8 \
10435 (a_, vcreate_u8 \
10436 (__AARCH64_UINT64_C (0x0))); \
10437 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10438 : "+w"(result) \
10439 : "w"(b_), "i"(c) \
10440 : /* No clobbers */); \
10441 result; \
10444 #define vqshrn_high_n_u32(a, b, c) \
10445 __extension__ \
10446 ({ \
10447 uint32x4_t b_ = (b); \
10448 uint16x4_t a_ = (a); \
10449 uint16x8_t result = vcombine_u16 \
10450 (a_, vcreate_u16 \
10451 (__AARCH64_UINT64_C (0x0))); \
10452 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10453 : "+w"(result) \
10454 : "w"(b_), "i"(c) \
10455 : /* No clobbers */); \
10456 result; \
10459 #define vqshrn_high_n_u64(a, b, c) \
10460 __extension__ \
10461 ({ \
10462 uint64x2_t b_ = (b); \
10463 uint32x2_t a_ = (a); \
10464 uint32x4_t result = vcombine_u32 \
10465 (a_, vcreate_u32 \
10466 (__AARCH64_UINT64_C (0x0))); \
10467 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10468 : "+w"(result) \
10469 : "w"(b_), "i"(c) \
10470 : /* No clobbers */); \
10471 result; \
10474 #define vqshrun_high_n_s16(a, b, c) \
10475 __extension__ \
10476 ({ \
10477 int16x8_t b_ = (b); \
10478 uint8x8_t a_ = (a); \
10479 uint8x16_t result = vcombine_u8 \
10480 (a_, vcreate_u8 \
10481 (__AARCH64_UINT64_C (0x0))); \
10482 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10483 : "+w"(result) \
10484 : "w"(b_), "i"(c) \
10485 : /* No clobbers */); \
10486 result; \
10489 #define vqshrun_high_n_s32(a, b, c) \
10490 __extension__ \
10491 ({ \
10492 int32x4_t b_ = (b); \
10493 uint16x4_t a_ = (a); \
10494 uint16x8_t result = vcombine_u16 \
10495 (a_, vcreate_u16 \
10496 (__AARCH64_UINT64_C (0x0))); \
10497 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10498 : "+w"(result) \
10499 : "w"(b_), "i"(c) \
10500 : /* No clobbers */); \
10501 result; \
10504 #define vqshrun_high_n_s64(a, b, c) \
10505 __extension__ \
10506 ({ \
10507 int64x2_t b_ = (b); \
10508 uint32x2_t a_ = (a); \
10509 uint32x4_t result = vcombine_u32 \
10510 (a_, vcreate_u32 \
10511 (__AARCH64_UINT64_C (0x0))); \
10512 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10513 : "+w"(result) \
10514 : "w"(b_), "i"(c) \
10515 : /* No clobbers */); \
10516 result; \
10519 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10520 vrecpe_u32 (uint32x2_t a)
10522 uint32x2_t result;
10523 __asm__ ("urecpe %0.2s,%1.2s"
10524 : "=w"(result)
10525 : "w"(a)
10526 : /* No clobbers */);
10527 return result;
10530 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10531 vrecpeq_u32 (uint32x4_t a)
10533 uint32x4_t result;
10534 __asm__ ("urecpe %0.4s,%1.4s"
10535 : "=w"(result)
10536 : "w"(a)
10537 : /* No clobbers */);
10538 return result;
10541 #define vrshrn_high_n_s16(a, b, c) \
10542 __extension__ \
10543 ({ \
10544 int16x8_t b_ = (b); \
10545 int8x8_t a_ = (a); \
10546 int8x16_t result = vcombine_s8 \
10547 (a_, vcreate_s8 \
10548 (__AARCH64_UINT64_C (0x0))); \
10549 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10550 : "+w"(result) \
10551 : "w"(b_), "i"(c) \
10552 : /* No clobbers */); \
10553 result; \
10556 #define vrshrn_high_n_s32(a, b, c) \
10557 __extension__ \
10558 ({ \
10559 int32x4_t b_ = (b); \
10560 int16x4_t a_ = (a); \
10561 int16x8_t result = vcombine_s16 \
10562 (a_, vcreate_s16 \
10563 (__AARCH64_UINT64_C (0x0))); \
10564 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10565 : "+w"(result) \
10566 : "w"(b_), "i"(c) \
10567 : /* No clobbers */); \
10568 result; \
10571 #define vrshrn_high_n_s64(a, b, c) \
10572 __extension__ \
10573 ({ \
10574 int64x2_t b_ = (b); \
10575 int32x2_t a_ = (a); \
10576 int32x4_t result = vcombine_s32 \
10577 (a_, vcreate_s32 \
10578 (__AARCH64_UINT64_C (0x0))); \
10579 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10580 : "+w"(result) \
10581 : "w"(b_), "i"(c) \
10582 : /* No clobbers */); \
10583 result; \
10586 #define vrshrn_high_n_u16(a, b, c) \
10587 __extension__ \
10588 ({ \
10589 uint16x8_t b_ = (b); \
10590 uint8x8_t a_ = (a); \
10591 uint8x16_t result = vcombine_u8 \
10592 (a_, vcreate_u8 \
10593 (__AARCH64_UINT64_C (0x0))); \
10594 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10595 : "+w"(result) \
10596 : "w"(b_), "i"(c) \
10597 : /* No clobbers */); \
10598 result; \
10601 #define vrshrn_high_n_u32(a, b, c) \
10602 __extension__ \
10603 ({ \
10604 uint32x4_t b_ = (b); \
10605 uint16x4_t a_ = (a); \
10606 uint16x8_t result = vcombine_u16 \
10607 (a_, vcreate_u16 \
10608 (__AARCH64_UINT64_C (0x0))); \
10609 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10610 : "+w"(result) \
10611 : "w"(b_), "i"(c) \
10612 : /* No clobbers */); \
10613 result; \
10616 #define vrshrn_high_n_u64(a, b, c) \
10617 __extension__ \
10618 ({ \
10619 uint64x2_t b_ = (b); \
10620 uint32x2_t a_ = (a); \
10621 uint32x4_t result = vcombine_u32 \
10622 (a_, vcreate_u32 \
10623 (__AARCH64_UINT64_C (0x0))); \
10624 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10625 : "+w"(result) \
10626 : "w"(b_), "i"(c) \
10627 : /* No clobbers */); \
10628 result; \
10631 #define vrshrn_n_s16(a, b) \
10632 __extension__ \
10633 ({ \
10634 int16x8_t a_ = (a); \
10635 int8x8_t result; \
10636 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10637 : "=w"(result) \
10638 : "w"(a_), "i"(b) \
10639 : /* No clobbers */); \
10640 result; \
10643 #define vrshrn_n_s32(a, b) \
10644 __extension__ \
10645 ({ \
10646 int32x4_t a_ = (a); \
10647 int16x4_t result; \
10648 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10649 : "=w"(result) \
10650 : "w"(a_), "i"(b) \
10651 : /* No clobbers */); \
10652 result; \
10655 #define vrshrn_n_s64(a, b) \
10656 __extension__ \
10657 ({ \
10658 int64x2_t a_ = (a); \
10659 int32x2_t result; \
10660 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10661 : "=w"(result) \
10662 : "w"(a_), "i"(b) \
10663 : /* No clobbers */); \
10664 result; \
10667 #define vrshrn_n_u16(a, b) \
10668 __extension__ \
10669 ({ \
10670 uint16x8_t a_ = (a); \
10671 uint8x8_t result; \
10672 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10673 : "=w"(result) \
10674 : "w"(a_), "i"(b) \
10675 : /* No clobbers */); \
10676 result; \
10679 #define vrshrn_n_u32(a, b) \
10680 __extension__ \
10681 ({ \
10682 uint32x4_t a_ = (a); \
10683 uint16x4_t result; \
10684 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10685 : "=w"(result) \
10686 : "w"(a_), "i"(b) \
10687 : /* No clobbers */); \
10688 result; \
10691 #define vrshrn_n_u64(a, b) \
10692 __extension__ \
10693 ({ \
10694 uint64x2_t a_ = (a); \
10695 uint32x2_t result; \
10696 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10697 : "=w"(result) \
10698 : "w"(a_), "i"(b) \
10699 : /* No clobbers */); \
10700 result; \
10703 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10704 vrsqrte_f32 (float32x2_t a)
10706 float32x2_t result;
10707 __asm__ ("frsqrte %0.2s,%1.2s"
10708 : "=w"(result)
10709 : "w"(a)
10710 : /* No clobbers */);
10711 return result;
10714 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10715 vrsqrte_f64 (float64x1_t a)
10717 float64x1_t result;
10718 __asm__ ("frsqrte %d0,%d1"
10719 : "=w"(result)
10720 : "w"(a)
10721 : /* No clobbers */);
10722 return result;
10725 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10726 vrsqrte_u32 (uint32x2_t a)
10728 uint32x2_t result;
10729 __asm__ ("ursqrte %0.2s,%1.2s"
10730 : "=w"(result)
10731 : "w"(a)
10732 : /* No clobbers */);
10733 return result;
10736 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10737 vrsqrted_f64 (float64_t a)
10739 float64_t result;
10740 __asm__ ("frsqrte %d0,%d1"
10741 : "=w"(result)
10742 : "w"(a)
10743 : /* No clobbers */);
10744 return result;
10747 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10748 vrsqrteq_f32 (float32x4_t a)
10750 float32x4_t result;
10751 __asm__ ("frsqrte %0.4s,%1.4s"
10752 : "=w"(result)
10753 : "w"(a)
10754 : /* No clobbers */);
10755 return result;
10758 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10759 vrsqrteq_f64 (float64x2_t a)
10761 float64x2_t result;
10762 __asm__ ("frsqrte %0.2d,%1.2d"
10763 : "=w"(result)
10764 : "w"(a)
10765 : /* No clobbers */);
10766 return result;
10769 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10770 vrsqrteq_u32 (uint32x4_t a)
10772 uint32x4_t result;
10773 __asm__ ("ursqrte %0.4s,%1.4s"
10774 : "=w"(result)
10775 : "w"(a)
10776 : /* No clobbers */);
10777 return result;
10780 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10781 vrsqrtes_f32 (float32_t a)
10783 float32_t result;
10784 __asm__ ("frsqrte %s0,%s1"
10785 : "=w"(result)
10786 : "w"(a)
10787 : /* No clobbers */);
10788 return result;
10791 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10792 vrsqrts_f32 (float32x2_t a, float32x2_t b)
10794 float32x2_t result;
10795 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
10796 : "=w"(result)
10797 : "w"(a), "w"(b)
10798 : /* No clobbers */);
10799 return result;
10802 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10803 vrsqrtsd_f64 (float64_t a, float64_t b)
10805 float64_t result;
10806 __asm__ ("frsqrts %d0,%d1,%d2"
10807 : "=w"(result)
10808 : "w"(a), "w"(b)
10809 : /* No clobbers */);
10810 return result;
10813 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10814 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10816 float32x4_t result;
10817 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10818 : "=w"(result)
10819 : "w"(a), "w"(b)
10820 : /* No clobbers */);
10821 return result;
10824 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10825 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10827 float64x2_t result;
10828 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10829 : "=w"(result)
10830 : "w"(a), "w"(b)
10831 : /* No clobbers */);
10832 return result;
10835 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10836 vrsqrtss_f32 (float32_t a, float32_t b)
10838 float32_t result;
10839 __asm__ ("frsqrts %s0,%s1,%s2"
10840 : "=w"(result)
10841 : "w"(a), "w"(b)
10842 : /* No clobbers */);
10843 return result;
10846 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10847 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
10849 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10850 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10851 : "+w"(result)
10852 : "w"(b), "w"(c)
10853 : /* No clobbers */);
10854 return result;
10857 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10858 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
10860 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10861 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10862 : "+w"(result)
10863 : "w"(b), "w"(c)
10864 : /* No clobbers */);
10865 return result;
10868 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10869 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
10871 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10872 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10873 : "+w"(result)
10874 : "w"(b), "w"(c)
10875 : /* No clobbers */);
10876 return result;
10879 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10880 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
10882 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10883 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10884 : "+w"(result)
10885 : "w"(b), "w"(c)
10886 : /* No clobbers */);
10887 return result;
10890 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10891 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
10893 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10894 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10895 : "+w"(result)
10896 : "w"(b), "w"(c)
10897 : /* No clobbers */);
10898 return result;
10901 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10902 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
10904 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10905 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10906 : "+w"(result)
10907 : "w"(b), "w"(c)
10908 : /* No clobbers */);
10909 return result;
10912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10913 vrsubhn_s16 (int16x8_t a, int16x8_t b)
10915 int8x8_t result;
10916 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10917 : "=w"(result)
10918 : "w"(a), "w"(b)
10919 : /* No clobbers */);
10920 return result;
10923 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10924 vrsubhn_s32 (int32x4_t a, int32x4_t b)
10926 int16x4_t result;
10927 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10928 : "=w"(result)
10929 : "w"(a), "w"(b)
10930 : /* No clobbers */);
10931 return result;
10934 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10935 vrsubhn_s64 (int64x2_t a, int64x2_t b)
10937 int32x2_t result;
10938 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10939 : "=w"(result)
10940 : "w"(a), "w"(b)
10941 : /* No clobbers */);
10942 return result;
10945 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10946 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
10948 uint8x8_t result;
10949 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10950 : "=w"(result)
10951 : "w"(a), "w"(b)
10952 : /* No clobbers */);
10953 return result;
10956 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10957 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
10959 uint16x4_t result;
10960 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10961 : "=w"(result)
10962 : "w"(a), "w"(b)
10963 : /* No clobbers */);
10964 return result;
10967 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10968 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
10970 uint32x2_t result;
10971 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10972 : "=w"(result)
10973 : "w"(a), "w"(b)
10974 : /* No clobbers */);
10975 return result;
10978 #define vshrn_high_n_s16(a, b, c) \
10979 __extension__ \
10980 ({ \
10981 int16x8_t b_ = (b); \
10982 int8x8_t a_ = (a); \
10983 int8x16_t result = vcombine_s8 \
10984 (a_, vcreate_s8 \
10985 (__AARCH64_UINT64_C (0x0))); \
10986 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
10987 : "+w"(result) \
10988 : "w"(b_), "i"(c) \
10989 : /* No clobbers */); \
10990 result; \
10993 #define vshrn_high_n_s32(a, b, c) \
10994 __extension__ \
10995 ({ \
10996 int32x4_t b_ = (b); \
10997 int16x4_t a_ = (a); \
10998 int16x8_t result = vcombine_s16 \
10999 (a_, vcreate_s16 \
11000 (__AARCH64_UINT64_C (0x0))); \
11001 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11002 : "+w"(result) \
11003 : "w"(b_), "i"(c) \
11004 : /* No clobbers */); \
11005 result; \
11008 #define vshrn_high_n_s64(a, b, c) \
11009 __extension__ \
11010 ({ \
11011 int64x2_t b_ = (b); \
11012 int32x2_t a_ = (a); \
11013 int32x4_t result = vcombine_s32 \
11014 (a_, vcreate_s32 \
11015 (__AARCH64_UINT64_C (0x0))); \
11016 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11017 : "+w"(result) \
11018 : "w"(b_), "i"(c) \
11019 : /* No clobbers */); \
11020 result; \
11023 #define vshrn_high_n_u16(a, b, c) \
11024 __extension__ \
11025 ({ \
11026 uint16x8_t b_ = (b); \
11027 uint8x8_t a_ = (a); \
11028 uint8x16_t result = vcombine_u8 \
11029 (a_, vcreate_u8 \
11030 (__AARCH64_UINT64_C (0x0))); \
11031 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11032 : "+w"(result) \
11033 : "w"(b_), "i"(c) \
11034 : /* No clobbers */); \
11035 result; \
11038 #define vshrn_high_n_u32(a, b, c) \
11039 __extension__ \
11040 ({ \
11041 uint32x4_t b_ = (b); \
11042 uint16x4_t a_ = (a); \
11043 uint16x8_t result = vcombine_u16 \
11044 (a_, vcreate_u16 \
11045 (__AARCH64_UINT64_C (0x0))); \
11046 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11047 : "+w"(result) \
11048 : "w"(b_), "i"(c) \
11049 : /* No clobbers */); \
11050 result; \
11053 #define vshrn_high_n_u64(a, b, c) \
11054 __extension__ \
11055 ({ \
11056 uint64x2_t b_ = (b); \
11057 uint32x2_t a_ = (a); \
11058 uint32x4_t result = vcombine_u32 \
11059 (a_, vcreate_u32 \
11060 (__AARCH64_UINT64_C (0x0))); \
11061 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11062 : "+w"(result) \
11063 : "w"(b_), "i"(c) \
11064 : /* No clobbers */); \
11065 result; \
11068 #define vshrn_n_s16(a, b) \
11069 __extension__ \
11070 ({ \
11071 int16x8_t a_ = (a); \
11072 int8x8_t result; \
11073 __asm__ ("shrn %0.8b,%1.8h,%2" \
11074 : "=w"(result) \
11075 : "w"(a_), "i"(b) \
11076 : /* No clobbers */); \
11077 result; \
11080 #define vshrn_n_s32(a, b) \
11081 __extension__ \
11082 ({ \
11083 int32x4_t a_ = (a); \
11084 int16x4_t result; \
11085 __asm__ ("shrn %0.4h,%1.4s,%2" \
11086 : "=w"(result) \
11087 : "w"(a_), "i"(b) \
11088 : /* No clobbers */); \
11089 result; \
11092 #define vshrn_n_s64(a, b) \
11093 __extension__ \
11094 ({ \
11095 int64x2_t a_ = (a); \
11096 int32x2_t result; \
11097 __asm__ ("shrn %0.2s,%1.2d,%2" \
11098 : "=w"(result) \
11099 : "w"(a_), "i"(b) \
11100 : /* No clobbers */); \
11101 result; \
11104 #define vshrn_n_u16(a, b) \
11105 __extension__ \
11106 ({ \
11107 uint16x8_t a_ = (a); \
11108 uint8x8_t result; \
11109 __asm__ ("shrn %0.8b,%1.8h,%2" \
11110 : "=w"(result) \
11111 : "w"(a_), "i"(b) \
11112 : /* No clobbers */); \
11113 result; \
11116 #define vshrn_n_u32(a, b) \
11117 __extension__ \
11118 ({ \
11119 uint32x4_t a_ = (a); \
11120 uint16x4_t result; \
11121 __asm__ ("shrn %0.4h,%1.4s,%2" \
11122 : "=w"(result) \
11123 : "w"(a_), "i"(b) \
11124 : /* No clobbers */); \
11125 result; \
11128 #define vshrn_n_u64(a, b) \
11129 __extension__ \
11130 ({ \
11131 uint64x2_t a_ = (a); \
11132 uint32x2_t result; \
11133 __asm__ ("shrn %0.2s,%1.2d,%2" \
11134 : "=w"(result) \
11135 : "w"(a_), "i"(b) \
11136 : /* No clobbers */); \
11137 result; \
11140 #define vsli_n_p8(a, b, c) \
11141 __extension__ \
11142 ({ \
11143 poly8x8_t b_ = (b); \
11144 poly8x8_t a_ = (a); \
11145 poly8x8_t result; \
11146 __asm__ ("sli %0.8b,%2.8b,%3" \
11147 : "=w"(result) \
11148 : "0"(a_), "w"(b_), "i"(c) \
11149 : /* No clobbers */); \
11150 result; \
11153 #define vsli_n_p16(a, b, c) \
11154 __extension__ \
11155 ({ \
11156 poly16x4_t b_ = (b); \
11157 poly16x4_t a_ = (a); \
11158 poly16x4_t result; \
11159 __asm__ ("sli %0.4h,%2.4h,%3" \
11160 : "=w"(result) \
11161 : "0"(a_), "w"(b_), "i"(c) \
11162 : /* No clobbers */); \
11163 result; \
11166 #define vsliq_n_p8(a, b, c) \
11167 __extension__ \
11168 ({ \
11169 poly8x16_t b_ = (b); \
11170 poly8x16_t a_ = (a); \
11171 poly8x16_t result; \
11172 __asm__ ("sli %0.16b,%2.16b,%3" \
11173 : "=w"(result) \
11174 : "0"(a_), "w"(b_), "i"(c) \
11175 : /* No clobbers */); \
11176 result; \
11179 #define vsliq_n_p16(a, b, c) \
11180 __extension__ \
11181 ({ \
11182 poly16x8_t b_ = (b); \
11183 poly16x8_t a_ = (a); \
11184 poly16x8_t result; \
11185 __asm__ ("sli %0.8h,%2.8h,%3" \
11186 : "=w"(result) \
11187 : "0"(a_), "w"(b_), "i"(c) \
11188 : /* No clobbers */); \
11189 result; \
11192 #define vsri_n_p8(a, b, c) \
11193 __extension__ \
11194 ({ \
11195 poly8x8_t b_ = (b); \
11196 poly8x8_t a_ = (a); \
11197 poly8x8_t result; \
11198 __asm__ ("sri %0.8b,%2.8b,%3" \
11199 : "=w"(result) \
11200 : "0"(a_), "w"(b_), "i"(c) \
11201 : /* No clobbers */); \
11202 result; \
11205 #define vsri_n_p16(a, b, c) \
11206 __extension__ \
11207 ({ \
11208 poly16x4_t b_ = (b); \
11209 poly16x4_t a_ = (a); \
11210 poly16x4_t result; \
11211 __asm__ ("sri %0.4h,%2.4h,%3" \
11212 : "=w"(result) \
11213 : "0"(a_), "w"(b_), "i"(c) \
11214 : /* No clobbers */); \
11215 result; \
11218 #define vsriq_n_p8(a, b, c) \
11219 __extension__ \
11220 ({ \
11221 poly8x16_t b_ = (b); \
11222 poly8x16_t a_ = (a); \
11223 poly8x16_t result; \
11224 __asm__ ("sri %0.16b,%2.16b,%3" \
11225 : "=w"(result) \
11226 : "0"(a_), "w"(b_), "i"(c) \
11227 : /* No clobbers */); \
11228 result; \
11231 #define vsriq_n_p16(a, b, c) \
11232 __extension__ \
11233 ({ \
11234 poly16x8_t b_ = (b); \
11235 poly16x8_t a_ = (a); \
11236 poly16x8_t result; \
11237 __asm__ ("sri %0.8h,%2.8h,%3" \
11238 : "=w"(result) \
11239 : "0"(a_), "w"(b_), "i"(c) \
11240 : /* No clobbers */); \
11241 result; \
11244 #define vst1_lane_f32(a, b, c) \
11245 __extension__ \
11246 ({ \
11247 float32x2_t b_ = (b); \
11248 float32_t * a_ = (a); \
11249 __asm__ ("st1 {%1.s}[%2],[%0]" \
11251 : "r"(a_), "w"(b_), "i"(c) \
11252 : "memory"); \
11255 #define vst1_lane_f64(a, b, c) \
11256 __extension__ \
11257 ({ \
11258 float64x1_t b_ = (b); \
11259 float64_t * a_ = (a); \
11260 __asm__ ("st1 {%1.d}[%2],[%0]" \
11262 : "r"(a_), "w"(b_), "i"(c) \
11263 : "memory"); \
11266 #define vst1_lane_p8(a, b, c) \
11267 __extension__ \
11268 ({ \
11269 poly8x8_t b_ = (b); \
11270 poly8_t * a_ = (a); \
11271 __asm__ ("st1 {%1.b}[%2],[%0]" \
11273 : "r"(a_), "w"(b_), "i"(c) \
11274 : "memory"); \
11277 #define vst1_lane_p16(a, b, c) \
11278 __extension__ \
11279 ({ \
11280 poly16x4_t b_ = (b); \
11281 poly16_t * a_ = (a); \
11282 __asm__ ("st1 {%1.h}[%2],[%0]" \
11284 : "r"(a_), "w"(b_), "i"(c) \
11285 : "memory"); \
11288 #define vst1_lane_s8(a, b, c) \
11289 __extension__ \
11290 ({ \
11291 int8x8_t b_ = (b); \
11292 int8_t * a_ = (a); \
11293 __asm__ ("st1 {%1.b}[%2],[%0]" \
11295 : "r"(a_), "w"(b_), "i"(c) \
11296 : "memory"); \
11299 #define vst1_lane_s16(a, b, c) \
11300 __extension__ \
11301 ({ \
11302 int16x4_t b_ = (b); \
11303 int16_t * a_ = (a); \
11304 __asm__ ("st1 {%1.h}[%2],[%0]" \
11306 : "r"(a_), "w"(b_), "i"(c) \
11307 : "memory"); \
11310 #define vst1_lane_s32(a, b, c) \
11311 __extension__ \
11312 ({ \
11313 int32x2_t b_ = (b); \
11314 int32_t * a_ = (a); \
11315 __asm__ ("st1 {%1.s}[%2],[%0]" \
11317 : "r"(a_), "w"(b_), "i"(c) \
11318 : "memory"); \
11321 #define vst1_lane_s64(a, b, c) \
11322 __extension__ \
11323 ({ \
11324 int64x1_t b_ = (b); \
11325 int64_t * a_ = (a); \
11326 __asm__ ("st1 {%1.d}[%2],[%0]" \
11328 : "r"(a_), "w"(b_), "i"(c) \
11329 : "memory"); \
11332 #define vst1_lane_u8(a, b, c) \
11333 __extension__ \
11334 ({ \
11335 uint8x8_t b_ = (b); \
11336 uint8_t * a_ = (a); \
11337 __asm__ ("st1 {%1.b}[%2],[%0]" \
11339 : "r"(a_), "w"(b_), "i"(c) \
11340 : "memory"); \
11343 #define vst1_lane_u16(a, b, c) \
11344 __extension__ \
11345 ({ \
11346 uint16x4_t b_ = (b); \
11347 uint16_t * a_ = (a); \
11348 __asm__ ("st1 {%1.h}[%2],[%0]" \
11350 : "r"(a_), "w"(b_), "i"(c) \
11351 : "memory"); \
11354 #define vst1_lane_u32(a, b, c) \
11355 __extension__ \
11356 ({ \
11357 uint32x2_t b_ = (b); \
11358 uint32_t * a_ = (a); \
11359 __asm__ ("st1 {%1.s}[%2],[%0]" \
11361 : "r"(a_), "w"(b_), "i"(c) \
11362 : "memory"); \
11365 #define vst1_lane_u64(a, b, c) \
11366 __extension__ \
11367 ({ \
11368 uint64x1_t b_ = (b); \
11369 uint64_t * a_ = (a); \
11370 __asm__ ("st1 {%1.d}[%2],[%0]" \
11372 : "r"(a_), "w"(b_), "i"(c) \
11373 : "memory"); \
11377 #define vst1q_lane_f32(a, b, c) \
11378 __extension__ \
11379 ({ \
11380 float32x4_t b_ = (b); \
11381 float32_t * a_ = (a); \
11382 __asm__ ("st1 {%1.s}[%2],[%0]" \
11384 : "r"(a_), "w"(b_), "i"(c) \
11385 : "memory"); \
11388 #define vst1q_lane_f64(a, b, c) \
11389 __extension__ \
11390 ({ \
11391 float64x2_t b_ = (b); \
11392 float64_t * a_ = (a); \
11393 __asm__ ("st1 {%1.d}[%2],[%0]" \
11395 : "r"(a_), "w"(b_), "i"(c) \
11396 : "memory"); \
11399 #define vst1q_lane_p8(a, b, c) \
11400 __extension__ \
11401 ({ \
11402 poly8x16_t b_ = (b); \
11403 poly8_t * a_ = (a); \
11404 __asm__ ("st1 {%1.b}[%2],[%0]" \
11406 : "r"(a_), "w"(b_), "i"(c) \
11407 : "memory"); \
11410 #define vst1q_lane_p16(a, b, c) \
11411 __extension__ \
11412 ({ \
11413 poly16x8_t b_ = (b); \
11414 poly16_t * a_ = (a); \
11415 __asm__ ("st1 {%1.h}[%2],[%0]" \
11417 : "r"(a_), "w"(b_), "i"(c) \
11418 : "memory"); \
11421 #define vst1q_lane_s8(a, b, c) \
11422 __extension__ \
11423 ({ \
11424 int8x16_t b_ = (b); \
11425 int8_t * a_ = (a); \
11426 __asm__ ("st1 {%1.b}[%2],[%0]" \
11428 : "r"(a_), "w"(b_), "i"(c) \
11429 : "memory"); \
11432 #define vst1q_lane_s16(a, b, c) \
11433 __extension__ \
11434 ({ \
11435 int16x8_t b_ = (b); \
11436 int16_t * a_ = (a); \
11437 __asm__ ("st1 {%1.h}[%2],[%0]" \
11439 : "r"(a_), "w"(b_), "i"(c) \
11440 : "memory"); \
11443 #define vst1q_lane_s32(a, b, c) \
11444 __extension__ \
11445 ({ \
11446 int32x4_t b_ = (b); \
11447 int32_t * a_ = (a); \
11448 __asm__ ("st1 {%1.s}[%2],[%0]" \
11450 : "r"(a_), "w"(b_), "i"(c) \
11451 : "memory"); \
11454 #define vst1q_lane_s64(a, b, c) \
11455 __extension__ \
11456 ({ \
11457 int64x2_t b_ = (b); \
11458 int64_t * a_ = (a); \
11459 __asm__ ("st1 {%1.d}[%2],[%0]" \
11461 : "r"(a_), "w"(b_), "i"(c) \
11462 : "memory"); \
11465 #define vst1q_lane_u8(a, b, c) \
11466 __extension__ \
11467 ({ \
11468 uint8x16_t b_ = (b); \
11469 uint8_t * a_ = (a); \
11470 __asm__ ("st1 {%1.b}[%2],[%0]" \
11472 : "r"(a_), "w"(b_), "i"(c) \
11473 : "memory"); \
11476 #define vst1q_lane_u16(a, b, c) \
11477 __extension__ \
11478 ({ \
11479 uint16x8_t b_ = (b); \
11480 uint16_t * a_ = (a); \
11481 __asm__ ("st1 {%1.h}[%2],[%0]" \
11483 : "r"(a_), "w"(b_), "i"(c) \
11484 : "memory"); \
11487 #define vst1q_lane_u32(a, b, c) \
11488 __extension__ \
11489 ({ \
11490 uint32x4_t b_ = (b); \
11491 uint32_t * a_ = (a); \
11492 __asm__ ("st1 {%1.s}[%2],[%0]" \
11494 : "r"(a_), "w"(b_), "i"(c) \
11495 : "memory"); \
11498 #define vst1q_lane_u64(a, b, c) \
11499 __extension__ \
11500 ({ \
11501 uint64x2_t b_ = (b); \
11502 uint64_t * a_ = (a); \
11503 __asm__ ("st1 {%1.d}[%2],[%0]" \
11505 : "r"(a_), "w"(b_), "i"(c) \
11506 : "memory"); \
11509 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11510 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11512 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11513 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11514 : "+w"(result)
11515 : "w"(b), "w"(c)
11516 : /* No clobbers */);
11517 return result;
11520 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11521 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11523 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11524 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11525 : "+w"(result)
11526 : "w"(b), "w"(c)
11527 : /* No clobbers */);
11528 return result;
11531 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11532 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11534 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11535 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11536 : "+w"(result)
11537 : "w"(b), "w"(c)
11538 : /* No clobbers */);
11539 return result;
11542 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11543 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11545 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11546 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11547 : "+w"(result)
11548 : "w"(b), "w"(c)
11549 : /* No clobbers */);
11550 return result;
11553 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11554 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11556 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11557 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11558 : "+w"(result)
11559 : "w"(b), "w"(c)
11560 : /* No clobbers */);
11561 return result;
11564 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11565 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11567 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11568 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11569 : "+w"(result)
11570 : "w"(b), "w"(c)
11571 : /* No clobbers */);
11572 return result;
11575 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11576 vsubhn_s16 (int16x8_t a, int16x8_t b)
11578 int8x8_t result;
11579 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11580 : "=w"(result)
11581 : "w"(a), "w"(b)
11582 : /* No clobbers */);
11583 return result;
11586 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11587 vsubhn_s32 (int32x4_t a, int32x4_t b)
11589 int16x4_t result;
11590 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11591 : "=w"(result)
11592 : "w"(a), "w"(b)
11593 : /* No clobbers */);
11594 return result;
11597 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11598 vsubhn_s64 (int64x2_t a, int64x2_t b)
11600 int32x2_t result;
11601 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11602 : "=w"(result)
11603 : "w"(a), "w"(b)
11604 : /* No clobbers */);
11605 return result;
11608 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11609 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
11611 uint8x8_t result;
11612 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11613 : "=w"(result)
11614 : "w"(a), "w"(b)
11615 : /* No clobbers */);
11616 return result;
11619 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11620 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
11622 uint16x4_t result;
11623 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11624 : "=w"(result)
11625 : "w"(a), "w"(b)
11626 : /* No clobbers */);
11627 return result;
11630 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11631 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
11633 uint32x2_t result;
11634 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11635 : "=w"(result)
11636 : "w"(a), "w"(b)
11637 : /* No clobbers */);
11638 return result;
11641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11642 vtst_p8 (poly8x8_t a, poly8x8_t b)
11644 uint8x8_t result;
11645 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
11646 : "=w"(result)
11647 : "w"(a), "w"(b)
11648 : /* No clobbers */);
11649 return result;
11652 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11653 vtst_p16 (poly16x4_t a, poly16x4_t b)
11655 uint16x4_t result;
11656 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
11657 : "=w"(result)
11658 : "w"(a), "w"(b)
11659 : /* No clobbers */);
11660 return result;
11663 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11664 vtstq_p8 (poly8x16_t a, poly8x16_t b)
11666 uint8x16_t result;
11667 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
11668 : "=w"(result)
11669 : "w"(a), "w"(b)
11670 : /* No clobbers */);
11671 return result;
11674 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11675 vtstq_p16 (poly16x8_t a, poly16x8_t b)
11677 uint16x8_t result;
11678 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
11679 : "=w"(result)
11680 : "w"(a), "w"(b)
11681 : /* No clobbers */);
11682 return result;
11685 /* End of temporary inline asm implementations. */
11687 /* Start of temporary inline asm for vldn, vstn and friends. */
11689 /* Create struct element types for duplicating loads.
11691 Create 2 element structures of:
11693 +------+----+----+----+----+
11694 | | 8 | 16 | 32 | 64 |
11695 +------+----+----+----+----+
11696 |int | Y | Y | N | N |
11697 +------+----+----+----+----+
11698 |uint | Y | Y | N | N |
11699 +------+----+----+----+----+
11700 |float | - | - | N | N |
11701 +------+----+----+----+----+
11702 |poly | Y | Y | - | - |
11703 +------+----+----+----+----+
11705 Create 3 element structures of:
11707 +------+----+----+----+----+
11708 | | 8 | 16 | 32 | 64 |
11709 +------+----+----+----+----+
11710 |int | Y | Y | Y | Y |
11711 +------+----+----+----+----+
11712 |uint | Y | Y | Y | Y |
11713 +------+----+----+----+----+
11714 |float | - | - | Y | Y |
11715 +------+----+----+----+----+
11716 |poly | Y | Y | - | - |
11717 +------+----+----+----+----+
11719 Create 4 element structures of:
11721 +------+----+----+----+----+
11722 | | 8 | 16 | 32 | 64 |
11723 +------+----+----+----+----+
11724 |int | Y | N | N | Y |
11725 +------+----+----+----+----+
11726 |uint | Y | N | N | Y |
11727 +------+----+----+----+----+
11728 |float | - | - | N | Y |
11729 +------+----+----+----+----+
11730 |poly | Y | N | - | - |
11731 +------+----+----+----+----+
11733 This is required for casting memory reference. */
11734 #define __STRUCTN(t, sz, nelem) \
11735 typedef struct t ## sz ## x ## nelem ## _t { \
11736 t ## sz ## _t val[nelem]; \
11737 } t ## sz ## x ## nelem ## _t;
11739 /* 2-element structs. */
11740 __STRUCTN (int, 8, 2)
11741 __STRUCTN (int, 16, 2)
11742 __STRUCTN (uint, 8, 2)
11743 __STRUCTN (uint, 16, 2)
11744 __STRUCTN (poly, 8, 2)
11745 __STRUCTN (poly, 16, 2)
11746 /* 3-element structs. */
11747 __STRUCTN (int, 8, 3)
11748 __STRUCTN (int, 16, 3)
11749 __STRUCTN (int, 32, 3)
11750 __STRUCTN (int, 64, 3)
11751 __STRUCTN (uint, 8, 3)
11752 __STRUCTN (uint, 16, 3)
11753 __STRUCTN (uint, 32, 3)
11754 __STRUCTN (uint, 64, 3)
11755 __STRUCTN (float, 32, 3)
11756 __STRUCTN (float, 64, 3)
11757 __STRUCTN (poly, 8, 3)
11758 __STRUCTN (poly, 16, 3)
11759 /* 4-element structs. */
11760 __STRUCTN (int, 8, 4)
11761 __STRUCTN (int, 64, 4)
11762 __STRUCTN (uint, 8, 4)
11763 __STRUCTN (uint, 64, 4)
11764 __STRUCTN (poly, 8, 4)
11765 __STRUCTN (float, 64, 4)
11766 #undef __STRUCTN
11769 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
11770 mode, ptr_mode, funcsuffix, signedtype) \
11771 __extension__ static __inline void \
11772 __attribute__ ((__always_inline__)) \
11773 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
11774 intype __b, const int __c) \
11776 __builtin_aarch64_simd_oi __o; \
11777 largetype __temp; \
11778 __temp.val[0] \
11779 = vcombine_##funcsuffix (__b.val[0], \
11780 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11781 __temp.val[1] \
11782 = vcombine_##funcsuffix (__b.val[1], \
11783 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11784 __o = __builtin_aarch64_set_qregoi##mode (__o, \
11785 (signedtype) __temp.val[0], 0); \
11786 __o = __builtin_aarch64_set_qregoi##mode (__o, \
11787 (signedtype) __temp.val[1], 1); \
11788 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11789 __ptr, __o, __c); \
11792 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
11793 float32x4_t)
11794 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
11795 float64x2_t)
11796 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
11797 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
11798 int16x8_t)
11799 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
11800 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
11801 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
11802 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
11803 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
11804 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
11805 int16x8_t)
11806 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
11807 int32x4_t)
11808 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
11809 int64x2_t)
11811 #undef __ST2_LANE_FUNC
11812 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
11813 __extension__ static __inline void \
11814 __attribute__ ((__always_inline__)) \
11815 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
11816 intype __b, const int __c) \
11818 union { intype __i; \
11819 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
11820 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11821 __ptr, __temp.__o, __c); \
11824 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
11825 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
11826 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
11827 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
11828 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
11829 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
11830 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
11831 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
11832 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
11833 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
11834 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
11835 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
11837 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
11838 mode, ptr_mode, funcsuffix, signedtype) \
11839 __extension__ static __inline void \
11840 __attribute__ ((__always_inline__)) \
11841 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
11842 intype __b, const int __c) \
11844 __builtin_aarch64_simd_ci __o; \
11845 largetype __temp; \
11846 __temp.val[0] \
11847 = vcombine_##funcsuffix (__b.val[0], \
11848 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11849 __temp.val[1] \
11850 = vcombine_##funcsuffix (__b.val[1], \
11851 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11852 __temp.val[2] \
11853 = vcombine_##funcsuffix (__b.val[2], \
11854 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11855 __o = __builtin_aarch64_set_qregci##mode (__o, \
11856 (signedtype) __temp.val[0], 0); \
11857 __o = __builtin_aarch64_set_qregci##mode (__o, \
11858 (signedtype) __temp.val[1], 1); \
11859 __o = __builtin_aarch64_set_qregci##mode (__o, \
11860 (signedtype) __temp.val[2], 2); \
11861 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11862 __ptr, __o, __c); \
11865 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
11866 float32x4_t)
11867 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
11868 float64x2_t)
11869 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
11870 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
11871 int16x8_t)
11872 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
11873 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
11874 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
11875 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
11876 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
11877 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
11878 int16x8_t)
11879 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
11880 int32x4_t)
11881 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
11882 int64x2_t)
11884 #undef __ST3_LANE_FUNC
11885 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
11886 __extension__ static __inline void \
11887 __attribute__ ((__always_inline__)) \
11888 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
11889 intype __b, const int __c) \
11891 union { intype __i; \
11892 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
11893 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11894 __ptr, __temp.__o, __c); \
11897 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
11898 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
11899 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
11900 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
11901 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
11902 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
11903 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
11904 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
11905 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
11906 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
11907 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
11908 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
11910 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
11911 mode, ptr_mode, funcsuffix, signedtype) \
11912 __extension__ static __inline void \
11913 __attribute__ ((__always_inline__)) \
11914 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
11915 intype __b, const int __c) \
11917 __builtin_aarch64_simd_xi __o; \
11918 largetype __temp; \
11919 __temp.val[0] \
11920 = vcombine_##funcsuffix (__b.val[0], \
11921 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11922 __temp.val[1] \
11923 = vcombine_##funcsuffix (__b.val[1], \
11924 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11925 __temp.val[2] \
11926 = vcombine_##funcsuffix (__b.val[2], \
11927 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11928 __temp.val[3] \
11929 = vcombine_##funcsuffix (__b.val[3], \
11930 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
11931 __o = __builtin_aarch64_set_qregxi##mode (__o, \
11932 (signedtype) __temp.val[0], 0); \
11933 __o = __builtin_aarch64_set_qregxi##mode (__o, \
11934 (signedtype) __temp.val[1], 1); \
11935 __o = __builtin_aarch64_set_qregxi##mode (__o, \
11936 (signedtype) __temp.val[2], 2); \
11937 __o = __builtin_aarch64_set_qregxi##mode (__o, \
11938 (signedtype) __temp.val[3], 3); \
11939 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11940 __ptr, __o, __c); \
11943 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
11944 float32x4_t)
11945 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
11946 float64x2_t)
11947 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
11948 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
11949 int16x8_t)
11950 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
11951 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
11952 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
11953 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
11954 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
11955 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
11956 int16x8_t)
11957 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
11958 int32x4_t)
11959 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
11960 int64x2_t)
11962 #undef __ST4_LANE_FUNC
11963 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
11964 __extension__ static __inline void \
11965 __attribute__ ((__always_inline__)) \
11966 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
11967 intype __b, const int __c) \
11969 union { intype __i; \
11970 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
11971 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
11972 __ptr, __temp.__o, __c); \
11975 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
11976 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
11977 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
11978 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
11979 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
11980 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
11981 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
11982 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
11983 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
11984 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
11985 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
11986 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
11988 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11989 vaddlv_s32 (int32x2_t a)
11991 int64_t result;
11992 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
11993 return result;
11996 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11997 vaddlv_u32 (uint32x2_t a)
11999 uint64_t result;
12000 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12001 return result;
12004 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12005 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12007 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
12010 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12011 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12013 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
12016 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12017 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12019 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
12022 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12023 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12025 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
12028 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12029 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12031 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
12034 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12035 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12037 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
12040 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12041 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12043 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
12046 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12047 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12049 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
12052 /* Table intrinsics. */
12054 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12055 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
12057 poly8x8_t result;
12058 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12059 : "=w"(result)
12060 : "w"(a), "w"(b)
12061 : /* No clobbers */);
12062 return result;
12065 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12066 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
12068 int8x8_t result;
12069 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12070 : "=w"(result)
12071 : "w"(a), "w"(b)
12072 : /* No clobbers */);
12073 return result;
12076 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12077 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
12079 uint8x8_t result;
12080 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12081 : "=w"(result)
12082 : "w"(a), "w"(b)
12083 : /* No clobbers */);
12084 return result;
12087 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12088 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
12090 poly8x16_t result;
12091 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12092 : "=w"(result)
12093 : "w"(a), "w"(b)
12094 : /* No clobbers */);
12095 return result;
12098 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12099 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
12101 int8x16_t result;
12102 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12103 : "=w"(result)
12104 : "w"(a), "w"(b)
12105 : /* No clobbers */);
12106 return result;
12109 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12110 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
12112 uint8x16_t result;
12113 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12114 : "=w"(result)
12115 : "w"(a), "w"(b)
12116 : /* No clobbers */);
12117 return result;
12120 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12121 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
12123 int8x8_t result;
12124 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12125 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12126 :"=w"(result)
12127 :"Q"(tab),"w"(idx)
12128 :"memory", "v16", "v17");
12129 return result;
12132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12133 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
12135 uint8x8_t result;
12136 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12137 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12138 :"=w"(result)
12139 :"Q"(tab),"w"(idx)
12140 :"memory", "v16", "v17");
12141 return result;
12144 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12145 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
12147 poly8x8_t result;
12148 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12149 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12150 :"=w"(result)
12151 :"Q"(tab),"w"(idx)
12152 :"memory", "v16", "v17");
12153 return result;
12156 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12157 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
12159 int8x16_t result;
12160 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12161 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12162 :"=w"(result)
12163 :"Q"(tab),"w"(idx)
12164 :"memory", "v16", "v17");
12165 return result;
12168 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12169 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
12171 uint8x16_t result;
12172 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12173 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12174 :"=w"(result)
12175 :"Q"(tab),"w"(idx)
12176 :"memory", "v16", "v17");
12177 return result;
12180 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12181 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
12183 poly8x16_t result;
12184 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12185 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12186 :"=w"(result)
12187 :"Q"(tab),"w"(idx)
12188 :"memory", "v16", "v17");
12189 return result;
12192 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12193 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
12195 int8x8_t result;
12196 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12197 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12198 :"=w"(result)
12199 :"Q"(tab),"w"(idx)
12200 :"memory", "v16", "v17", "v18");
12201 return result;
12204 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12205 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
12207 uint8x8_t result;
12208 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12209 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12210 :"=w"(result)
12211 :"Q"(tab),"w"(idx)
12212 :"memory", "v16", "v17", "v18");
12213 return result;
12216 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12217 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
12219 poly8x8_t result;
12220 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12221 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12222 :"=w"(result)
12223 :"Q"(tab),"w"(idx)
12224 :"memory", "v16", "v17", "v18");
12225 return result;
12228 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12229 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
12231 int8x16_t result;
12232 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12233 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12234 :"=w"(result)
12235 :"Q"(tab),"w"(idx)
12236 :"memory", "v16", "v17", "v18");
12237 return result;
12240 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12241 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
12243 uint8x16_t result;
12244 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12245 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12246 :"=w"(result)
12247 :"Q"(tab),"w"(idx)
12248 :"memory", "v16", "v17", "v18");
12249 return result;
12252 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12253 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
12255 poly8x16_t result;
12256 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12257 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12258 :"=w"(result)
12259 :"Q"(tab),"w"(idx)
12260 :"memory", "v16", "v17", "v18");
12261 return result;
12264 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12265 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
12267 int8x8_t result;
12268 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12269 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12270 :"=w"(result)
12271 :"Q"(tab),"w"(idx)
12272 :"memory", "v16", "v17", "v18", "v19");
12273 return result;
12276 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12277 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
12279 uint8x8_t result;
12280 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12281 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12282 :"=w"(result)
12283 :"Q"(tab),"w"(idx)
12284 :"memory", "v16", "v17", "v18", "v19");
12285 return result;
12288 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12289 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
12291 poly8x8_t result;
12292 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12293 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12294 :"=w"(result)
12295 :"Q"(tab),"w"(idx)
12296 :"memory", "v16", "v17", "v18", "v19");
12297 return result;
12301 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12302 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
12304 int8x16_t result;
12305 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12306 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12307 :"=w"(result)
12308 :"Q"(tab),"w"(idx)
12309 :"memory", "v16", "v17", "v18", "v19");
12310 return result;
12313 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12314 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
12316 uint8x16_t result;
12317 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12318 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12319 :"=w"(result)
12320 :"Q"(tab),"w"(idx)
12321 :"memory", "v16", "v17", "v18", "v19");
12322 return result;
12325 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12326 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
12328 poly8x16_t result;
12329 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12330 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12331 :"=w"(result)
12332 :"Q"(tab),"w"(idx)
12333 :"memory", "v16", "v17", "v18", "v19");
12334 return result;
12338 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12339 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
12341 int8x8_t result = r;
12342 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12343 : "+w"(result)
12344 : "w"(tab), "w"(idx)
12345 : /* No clobbers */);
12346 return result;
12349 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12350 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
12352 uint8x8_t result = r;
12353 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12354 : "+w"(result)
12355 : "w"(tab), "w"(idx)
12356 : /* No clobbers */);
12357 return result;
12360 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12361 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
12363 poly8x8_t result = r;
12364 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12365 : "+w"(result)
12366 : "w"(tab), "w"(idx)
12367 : /* No clobbers */);
12368 return result;
12371 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12372 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
12374 int8x16_t result = r;
12375 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12376 : "+w"(result)
12377 : "w"(tab), "w"(idx)
12378 : /* No clobbers */);
12379 return result;
12382 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12383 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
12385 uint8x16_t result = r;
12386 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12387 : "+w"(result)
12388 : "w"(tab), "w"(idx)
12389 : /* No clobbers */);
12390 return result;
12393 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12394 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
12396 poly8x16_t result = r;
12397 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12398 : "+w"(result)
12399 : "w"(tab), "w"(idx)
12400 : /* No clobbers */);
12401 return result;
12404 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12405 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
12407 int8x8_t result = r;
12408 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12409 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12410 :"+w"(result)
12411 :"Q"(tab),"w"(idx)
12412 :"memory", "v16", "v17");
12413 return result;
12416 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12417 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
12419 uint8x8_t result = r;
12420 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12421 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12422 :"+w"(result)
12423 :"Q"(tab),"w"(idx)
12424 :"memory", "v16", "v17");
12425 return result;
12428 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12429 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
12431 poly8x8_t result = r;
12432 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12433 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12434 :"+w"(result)
12435 :"Q"(tab),"w"(idx)
12436 :"memory", "v16", "v17");
12437 return result;
12441 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12442 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
12444 int8x16_t result = r;
12445 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12446 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12447 :"+w"(result)
12448 :"Q"(tab),"w"(idx)
12449 :"memory", "v16", "v17");
12450 return result;
12453 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12454 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
12456 uint8x16_t result = r;
12457 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12458 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12459 :"+w"(result)
12460 :"Q"(tab),"w"(idx)
12461 :"memory", "v16", "v17");
12462 return result;
12465 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12466 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
12468 poly8x16_t result = r;
12469 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12470 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12471 :"+w"(result)
12472 :"Q"(tab),"w"(idx)
12473 :"memory", "v16", "v17");
12474 return result;
12478 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12479 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
12481 int8x8_t result = r;
12482 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12483 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12484 :"+w"(result)
12485 :"Q"(tab),"w"(idx)
12486 :"memory", "v16", "v17", "v18");
12487 return result;
12490 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12491 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
12493 uint8x8_t result = r;
12494 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12495 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12496 :"+w"(result)
12497 :"Q"(tab),"w"(idx)
12498 :"memory", "v16", "v17", "v18");
12499 return result;
12502 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12503 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
12505 poly8x8_t result = r;
12506 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12507 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12508 :"+w"(result)
12509 :"Q"(tab),"w"(idx)
12510 :"memory", "v16", "v17", "v18");
12511 return result;
12515 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12516 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
12518 int8x16_t result = r;
12519 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12520 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12521 :"+w"(result)
12522 :"Q"(tab),"w"(idx)
12523 :"memory", "v16", "v17", "v18");
12524 return result;
12527 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12528 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
12530 uint8x16_t result = r;
12531 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12532 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12533 :"+w"(result)
12534 :"Q"(tab),"w"(idx)
12535 :"memory", "v16", "v17", "v18");
12536 return result;
12539 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12540 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
12542 poly8x16_t result = r;
12543 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12544 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12545 :"+w"(result)
12546 :"Q"(tab),"w"(idx)
12547 :"memory", "v16", "v17", "v18");
12548 return result;
12552 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12553 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
12555 int8x8_t result = r;
12556 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12557 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12558 :"+w"(result)
12559 :"Q"(tab),"w"(idx)
12560 :"memory", "v16", "v17", "v18", "v19");
12561 return result;
12564 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12565 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
12567 uint8x8_t result = r;
12568 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12569 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12570 :"+w"(result)
12571 :"Q"(tab),"w"(idx)
12572 :"memory", "v16", "v17", "v18", "v19");
12573 return result;
12576 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12577 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
12579 poly8x8_t result = r;
12580 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12581 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12582 :"+w"(result)
12583 :"Q"(tab),"w"(idx)
12584 :"memory", "v16", "v17", "v18", "v19");
12585 return result;
12589 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12590 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
12592 int8x16_t result = r;
12593 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12594 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12595 :"+w"(result)
12596 :"Q"(tab),"w"(idx)
12597 :"memory", "v16", "v17", "v18", "v19");
12598 return result;
12601 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12602 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
12604 uint8x16_t result = r;
12605 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12606 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12607 :"+w"(result)
12608 :"Q"(tab),"w"(idx)
12609 :"memory", "v16", "v17", "v18", "v19");
12610 return result;
12613 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12614 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
12616 poly8x16_t result = r;
12617 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12618 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12619 :"+w"(result)
12620 :"Q"(tab),"w"(idx)
12621 :"memory", "v16", "v17", "v18", "v19");
12622 return result;
12625 /* V7 legacy table intrinsics. */
12627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12628 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
12630 int8x8_t result;
12631 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12632 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12633 : "=w"(result)
12634 : "w"(temp), "w"(idx)
12635 : /* No clobbers */);
12636 return result;
12639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12640 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
12642 uint8x8_t result;
12643 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12644 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12645 : "=w"(result)
12646 : "w"(temp), "w"(idx)
12647 : /* No clobbers */);
12648 return result;
12651 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12652 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
12654 poly8x8_t result;
12655 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
12656 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12657 : "=w"(result)
12658 : "w"(temp), "w"(idx)
12659 : /* No clobbers */);
12660 return result;
12663 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12664 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
12666 int8x8_t result;
12667 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
12668 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12669 : "=w"(result)
12670 : "w"(temp), "w"(idx)
12671 : /* No clobbers */);
12672 return result;
12675 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12676 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
12678 uint8x8_t result;
12679 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
12680 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12681 : "=w"(result)
12682 : "w"(temp), "w"(idx)
12683 : /* No clobbers */);
12684 return result;
12687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12688 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
12690 poly8x8_t result;
12691 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
12692 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12693 : "=w"(result)
12694 : "w"(temp), "w"(idx)
12695 : /* No clobbers */);
12696 return result;
12699 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12700 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
12702 int8x8_t result;
12703 int8x16x2_t temp;
12704 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
12705 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12706 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12707 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12708 : "=w"(result)
12709 : "Q"(temp), "w"(idx)
12710 : "v16", "v17", "memory");
12711 return result;
12714 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12715 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
12717 uint8x8_t result;
12718 uint8x16x2_t temp;
12719 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
12720 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12721 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12722 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12723 : "=w"(result)
12724 : "Q"(temp), "w"(idx)
12725 : "v16", "v17", "memory");
12726 return result;
12729 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12730 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
12732 poly8x8_t result;
12733 poly8x16x2_t temp;
12734 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
12735 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
12736 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12737 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12738 : "=w"(result)
12739 : "Q"(temp), "w"(idx)
12740 : "v16", "v17", "memory");
12741 return result;
12744 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12745 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
12747 int8x8_t result;
12748 int8x16x2_t temp;
12749 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
12750 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
12751 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12752 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12753 : "=w"(result)
12754 : "Q"(temp), "w"(idx)
12755 : "v16", "v17", "memory");
12756 return result;
12759 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12760 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
12762 uint8x8_t result;
12763 uint8x16x2_t temp;
12764 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
12765 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
12766 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12767 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12768 : "=w"(result)
12769 : "Q"(temp), "w"(idx)
12770 : "v16", "v17", "memory");
12771 return result;
12774 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12775 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
12777 poly8x8_t result;
12778 poly8x16x2_t temp;
12779 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
12780 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
12781 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12782 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12783 : "=w"(result)
12784 : "Q"(temp), "w"(idx)
12785 : "v16", "v17", "memory");
12786 return result;
12789 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12790 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
12792 int8x8_t result = r;
12793 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
12794 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
12795 : "+w"(result)
12796 : "w"(temp), "w"(idx)
12797 : /* No clobbers */);
12798 return result;
12801 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12802 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
12804 uint8x8_t result = r;
12805 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
12806 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
12807 : "+w"(result)
12808 : "w"(temp), "w"(idx)
12809 : /* No clobbers */);
12810 return result;
12813 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12814 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
12816 poly8x8_t result = r;
12817 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
12818 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
12819 : "+w"(result)
12820 : "w"(temp), "w"(idx)
12821 : /* No clobbers */);
12822 return result;
12825 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12826 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
12828 int8x8_t result = r;
12829 int8x16x2_t temp;
12830 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
12831 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
12832 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12833 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12834 : "+w"(result)
12835 : "Q"(temp), "w"(idx)
12836 : "v16", "v17", "memory");
12837 return result;
12840 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12841 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
12843 uint8x8_t result = r;
12844 uint8x16x2_t temp;
12845 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
12846 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
12847 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12848 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12849 : "+w"(result)
12850 : "Q"(temp), "w"(idx)
12851 : "v16", "v17", "memory");
12852 return result;
12855 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12856 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
12858 poly8x8_t result = r;
12859 poly8x16x2_t temp;
12860 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
12861 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
12862 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
12863 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
12864 : "+w"(result)
12865 : "Q"(temp), "w"(idx)
12866 : "v16", "v17", "memory");
12867 return result;
12870 /* End of temporary inline asm. */
12872 /* Start of optimal implementations in approved order. */
12874 /* vabs */
12876 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12877 vabs_f32 (float32x2_t __a)
12879 return __builtin_aarch64_absv2sf (__a);
12882 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12883 vabs_f64 (float64x1_t __a)
12885 return (float64x1_t) {__builtin_fabs (__a[0])};
12888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12889 vabs_s8 (int8x8_t __a)
12891 return __builtin_aarch64_absv8qi (__a);
12894 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12895 vabs_s16 (int16x4_t __a)
12897 return __builtin_aarch64_absv4hi (__a);
12900 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12901 vabs_s32 (int32x2_t __a)
12903 return __builtin_aarch64_absv2si (__a);
12906 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12907 vabs_s64 (int64x1_t __a)
12909 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
12912 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12913 vabsq_f32 (float32x4_t __a)
12915 return __builtin_aarch64_absv4sf (__a);
12918 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12919 vabsq_f64 (float64x2_t __a)
12921 return __builtin_aarch64_absv2df (__a);
12924 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12925 vabsq_s8 (int8x16_t __a)
12927 return __builtin_aarch64_absv16qi (__a);
12930 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12931 vabsq_s16 (int16x8_t __a)
12933 return __builtin_aarch64_absv8hi (__a);
12936 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12937 vabsq_s32 (int32x4_t __a)
12939 return __builtin_aarch64_absv4si (__a);
12942 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12943 vabsq_s64 (int64x2_t __a)
12945 return __builtin_aarch64_absv2di (__a);
12948 /* vadd */
12950 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12951 vaddd_s64 (int64_t __a, int64_t __b)
12953 return __a + __b;
12956 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12957 vaddd_u64 (uint64_t __a, uint64_t __b)
12959 return __a + __b;
12962 /* vaddv */
12964 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
12965 vaddv_s8 (int8x8_t __a)
12967 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
12970 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
12971 vaddv_s16 (int16x4_t __a)
12973 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
12976 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
12977 vaddv_s32 (int32x2_t __a)
12979 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
12982 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
12983 vaddv_u8 (uint8x8_t __a)
12985 return vget_lane_u8 ((uint8x8_t)
12986 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
12990 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
12991 vaddv_u16 (uint16x4_t __a)
12993 return vget_lane_u16 ((uint16x4_t)
12994 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
12998 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12999 vaddv_u32 (uint32x2_t __a)
13001 return vget_lane_u32 ((uint32x2_t)
13002 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
13006 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13007 vaddvq_s8 (int8x16_t __a)
13009 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
13013 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13014 vaddvq_s16 (int16x8_t __a)
13016 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
13019 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13020 vaddvq_s32 (int32x4_t __a)
13022 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
13025 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13026 vaddvq_s64 (int64x2_t __a)
13028 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
13031 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13032 vaddvq_u8 (uint8x16_t __a)
13034 return vgetq_lane_u8 ((uint8x16_t)
13035 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
13039 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13040 vaddvq_u16 (uint16x8_t __a)
13042 return vgetq_lane_u16 ((uint16x8_t)
13043 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
13047 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13048 vaddvq_u32 (uint32x4_t __a)
13050 return vgetq_lane_u32 ((uint32x4_t)
13051 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
13055 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13056 vaddvq_u64 (uint64x2_t __a)
13058 return vgetq_lane_u64 ((uint64x2_t)
13059 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
13063 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13064 vaddv_f32 (float32x2_t __a)
13066 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
13067 return vget_lane_f32 (__t, 0);
13070 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13071 vaddvq_f32 (float32x4_t __a)
13073 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
13074 return vgetq_lane_f32 (__t, 0);
13077 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13078 vaddvq_f64 (float64x2_t __a)
13080 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
13081 return vgetq_lane_f64 (__t, 0);
13084 /* vbsl */
13086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13087 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
13089 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
13092 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13093 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
13095 return (float64x1_t)
13096 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
13099 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13100 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
13102 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
13105 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13106 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
13108 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
13111 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13112 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
13114 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
13117 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13118 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
13120 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
13123 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13124 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
13126 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
13129 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13130 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
13132 return (int64x1_t)
13133 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
13136 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13137 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
13139 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
13142 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13143 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
13145 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
13148 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13149 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
13151 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
13154 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13155 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
13157 return (uint64x1_t)
13158 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
13161 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13162 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
13164 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
13167 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13168 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
13170 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
13173 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13174 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
13176 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
13179 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13180 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
13182 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
13185 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13186 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
13188 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
13191 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13192 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
13194 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
13197 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13198 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
13200 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
13203 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13204 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
13206 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
13209 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13210 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
13212 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
13215 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13216 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
13218 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
13221 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13222 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
13224 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
13227 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13228 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
13230 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
13233 #ifdef __ARM_FEATURE_CRYPTO
13235 /* vaes */
13237 static __inline uint8x16_t
13238 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
13240 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
13243 static __inline uint8x16_t
13244 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
13246 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
13249 static __inline uint8x16_t
13250 vaesmcq_u8 (uint8x16_t data)
13252 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
13255 static __inline uint8x16_t
13256 vaesimcq_u8 (uint8x16_t data)
13258 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
13261 #endif
13263 /* vcage */
13265 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13266 vcage_f64 (float64x1_t __a, float64x1_t __b)
13268 return vabs_f64 (__a) >= vabs_f64 (__b);
13271 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13272 vcages_f32 (float32_t __a, float32_t __b)
13274 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
13277 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13278 vcage_f32 (float32x2_t __a, float32x2_t __b)
13280 return vabs_f32 (__a) >= vabs_f32 (__b);
13283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13284 vcageq_f32 (float32x4_t __a, float32x4_t __b)
13286 return vabsq_f32 (__a) >= vabsq_f32 (__b);
13289 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13290 vcaged_f64 (float64_t __a, float64_t __b)
13292 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
13295 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13296 vcageq_f64 (float64x2_t __a, float64x2_t __b)
13298 return vabsq_f64 (__a) >= vabsq_f64 (__b);
13301 /* vcagt */
13303 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13304 vcagts_f32 (float32_t __a, float32_t __b)
13306 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
13309 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13310 vcagt_f32 (float32x2_t __a, float32x2_t __b)
13312 return vabs_f32 (__a) > vabs_f32 (__b);
13315 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13316 vcagt_f64 (float64x1_t __a, float64x1_t __b)
13318 return vabs_f64 (__a) > vabs_f64 (__b);
13321 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13322 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
13324 return vabsq_f32 (__a) > vabsq_f32 (__b);
13327 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13328 vcagtd_f64 (float64_t __a, float64_t __b)
13330 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
13333 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13334 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
13336 return vabsq_f64 (__a) > vabsq_f64 (__b);
13339 /* vcale */
13341 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13342 vcale_f32 (float32x2_t __a, float32x2_t __b)
13344 return vabs_f32 (__a) <= vabs_f32 (__b);
13347 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13348 vcale_f64 (float64x1_t __a, float64x1_t __b)
13350 return vabs_f64 (__a) <= vabs_f64 (__b);
13353 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13354 vcaled_f64 (float64_t __a, float64_t __b)
13356 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
13359 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13360 vcales_f32 (float32_t __a, float32_t __b)
13362 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
13365 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13366 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
13368 return vabsq_f32 (__a) <= vabsq_f32 (__b);
13371 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13372 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
13374 return vabsq_f64 (__a) <= vabsq_f64 (__b);
13377 /* vcalt */
13379 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13380 vcalt_f32 (float32x2_t __a, float32x2_t __b)
13382 return vabs_f32 (__a) < vabs_f32 (__b);
13385 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13386 vcalt_f64 (float64x1_t __a, float64x1_t __b)
13388 return vabs_f64 (__a) < vabs_f64 (__b);
13391 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13392 vcaltd_f64 (float64_t __a, float64_t __b)
13394 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
13397 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13398 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
13400 return vabsq_f32 (__a) < vabsq_f32 (__b);
13403 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13404 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
13406 return vabsq_f64 (__a) < vabsq_f64 (__b);
13409 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13410 vcalts_f32 (float32_t __a, float32_t __b)
13412 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
13415 /* vceq - vector. */
13417 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13418 vceq_f32 (float32x2_t __a, float32x2_t __b)
13420 return (uint32x2_t) (__a == __b);
13423 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13424 vceq_f64 (float64x1_t __a, float64x1_t __b)
13426 return (uint64x1_t) (__a == __b);
13429 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13430 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
13432 return (uint8x8_t) (__a == __b);
13435 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13436 vceq_s8 (int8x8_t __a, int8x8_t __b)
13438 return (uint8x8_t) (__a == __b);
13441 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13442 vceq_s16 (int16x4_t __a, int16x4_t __b)
13444 return (uint16x4_t) (__a == __b);
13447 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13448 vceq_s32 (int32x2_t __a, int32x2_t __b)
13450 return (uint32x2_t) (__a == __b);
13453 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13454 vceq_s64 (int64x1_t __a, int64x1_t __b)
13456 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13459 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13460 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
13462 return (__a == __b);
13465 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13466 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
13468 return (__a == __b);
13471 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13472 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
13474 return (__a == __b);
13477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13478 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
13480 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13483 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13484 vceqq_f32 (float32x4_t __a, float32x4_t __b)
13486 return (uint32x4_t) (__a == __b);
13489 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13490 vceqq_f64 (float64x2_t __a, float64x2_t __b)
13492 return (uint64x2_t) (__a == __b);
13495 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13496 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
13498 return (uint8x16_t) (__a == __b);
13501 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13502 vceqq_s8 (int8x16_t __a, int8x16_t __b)
13504 return (uint8x16_t) (__a == __b);
13507 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13508 vceqq_s16 (int16x8_t __a, int16x8_t __b)
13510 return (uint16x8_t) (__a == __b);
13513 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13514 vceqq_s32 (int32x4_t __a, int32x4_t __b)
13516 return (uint32x4_t) (__a == __b);
13519 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13520 vceqq_s64 (int64x2_t __a, int64x2_t __b)
13522 return (uint64x2_t) (__a == __b);
13525 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13526 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
13528 return (__a == __b);
13531 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13532 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
13534 return (__a == __b);
13537 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13538 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
13540 return (__a == __b);
13543 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13544 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
13546 return (__a == __b);
13549 /* vceq - scalar. */
13551 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13552 vceqs_f32 (float32_t __a, float32_t __b)
13554 return __a == __b ? -1 : 0;
13557 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13558 vceqd_s64 (int64_t __a, int64_t __b)
13560 return __a == __b ? -1ll : 0ll;
13563 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13564 vceqd_u64 (uint64_t __a, uint64_t __b)
13566 return __a == __b ? -1ll : 0ll;
13569 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13570 vceqd_f64 (float64_t __a, float64_t __b)
13572 return __a == __b ? -1ll : 0ll;
13575 /* vceqz - vector. */
13577 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13578 vceqz_f32 (float32x2_t __a)
13580 return (uint32x2_t) (__a == 0.0f);
13583 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13584 vceqz_f64 (float64x1_t __a)
13586 return (uint64x1_t) (__a == (float64x1_t) {0.0});
13589 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13590 vceqz_p8 (poly8x8_t __a)
13592 return (uint8x8_t) (__a == 0);
13595 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13596 vceqz_s8 (int8x8_t __a)
13598 return (uint8x8_t) (__a == 0);
13601 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13602 vceqz_s16 (int16x4_t __a)
13604 return (uint16x4_t) (__a == 0);
13607 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13608 vceqz_s32 (int32x2_t __a)
13610 return (uint32x2_t) (__a == 0);
13613 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13614 vceqz_s64 (int64x1_t __a)
13616 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
13619 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13620 vceqz_u8 (uint8x8_t __a)
13622 return (__a == 0);
13625 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13626 vceqz_u16 (uint16x4_t __a)
13628 return (__a == 0);
13631 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13632 vceqz_u32 (uint32x2_t __a)
13634 return (__a == 0);
13637 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13638 vceqz_u64 (uint64x1_t __a)
13640 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
13643 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13644 vceqzq_f32 (float32x4_t __a)
13646 return (uint32x4_t) (__a == 0.0f);
13649 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13650 vceqzq_f64 (float64x2_t __a)
13652 return (uint64x2_t) (__a == 0.0f);
13655 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13656 vceqzq_p8 (poly8x16_t __a)
13658 return (uint8x16_t) (__a == 0);
13661 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13662 vceqzq_s8 (int8x16_t __a)
13664 return (uint8x16_t) (__a == 0);
13667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13668 vceqzq_s16 (int16x8_t __a)
13670 return (uint16x8_t) (__a == 0);
13673 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13674 vceqzq_s32 (int32x4_t __a)
13676 return (uint32x4_t) (__a == 0);
13679 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13680 vceqzq_s64 (int64x2_t __a)
13682 return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
13685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13686 vceqzq_u8 (uint8x16_t __a)
13688 return (__a == 0);
13691 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13692 vceqzq_u16 (uint16x8_t __a)
13694 return (__a == 0);
13697 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13698 vceqzq_u32 (uint32x4_t __a)
13700 return (__a == 0);
13703 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13704 vceqzq_u64 (uint64x2_t __a)
13706 return (__a == __AARCH64_UINT64_C (0));
13709 /* vceqz - scalar. */
13711 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13712 vceqzs_f32 (float32_t __a)
13714 return __a == 0.0f ? -1 : 0;
13717 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13718 vceqzd_s64 (int64_t __a)
13720 return __a == 0 ? -1ll : 0ll;
13723 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13724 vceqzd_u64 (uint64_t __a)
13726 return __a == 0 ? -1ll : 0ll;
13729 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13730 vceqzd_f64 (float64_t __a)
13732 return __a == 0.0 ? -1ll : 0ll;
13735 /* vcge - vector. */
13737 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13738 vcge_f32 (float32x2_t __a, float32x2_t __b)
13740 return (uint32x2_t) (__a >= __b);
13743 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13744 vcge_f64 (float64x1_t __a, float64x1_t __b)
13746 return (uint64x1_t) (__a >= __b);
13749 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13750 vcge_s8 (int8x8_t __a, int8x8_t __b)
13752 return (uint8x8_t) (__a >= __b);
13755 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13756 vcge_s16 (int16x4_t __a, int16x4_t __b)
13758 return (uint16x4_t) (__a >= __b);
13761 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13762 vcge_s32 (int32x2_t __a, int32x2_t __b)
13764 return (uint32x2_t) (__a >= __b);
13767 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13768 vcge_s64 (int64x1_t __a, int64x1_t __b)
13770 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
13773 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13774 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
13776 return (__a >= __b);
13779 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13780 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
13782 return (__a >= __b);
13785 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13786 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
13788 return (__a >= __b);
13791 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13792 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
13794 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
13797 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13798 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
13800 return (uint32x4_t) (__a >= __b);
13803 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13804 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
13806 return (uint64x2_t) (__a >= __b);
13809 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13810 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
13812 return (uint8x16_t) (__a >= __b);
13815 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13816 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
13818 return (uint16x8_t) (__a >= __b);
13821 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13822 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
13824 return (uint32x4_t) (__a >= __b);
13827 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13828 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
13830 return (uint64x2_t) (__a >= __b);
13833 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13834 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
13836 return (__a >= __b);
13839 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13840 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
13842 return (__a >= __b);
13845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13846 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
13848 return (__a >= __b);
13851 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13852 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
13854 return (__a >= __b);
13857 /* vcge - scalar. */
13859 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13860 vcges_f32 (float32_t __a, float32_t __b)
13862 return __a >= __b ? -1 : 0;
13865 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13866 vcged_s64 (int64_t __a, int64_t __b)
13868 return __a >= __b ? -1ll : 0ll;
13871 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13872 vcged_u64 (uint64_t __a, uint64_t __b)
13874 return __a >= __b ? -1ll : 0ll;
13877 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13878 vcged_f64 (float64_t __a, float64_t __b)
13880 return __a >= __b ? -1ll : 0ll;
13883 /* vcgez - vector. */
13885 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13886 vcgez_f32 (float32x2_t __a)
13888 return (uint32x2_t) (__a >= 0.0f);
13891 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13892 vcgez_f64 (float64x1_t __a)
13894 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
13897 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13898 vcgez_s8 (int8x8_t __a)
13900 return (uint8x8_t) (__a >= 0);
13903 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13904 vcgez_s16 (int16x4_t __a)
13906 return (uint16x4_t) (__a >= 0);
13909 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13910 vcgez_s32 (int32x2_t __a)
13912 return (uint32x2_t) (__a >= 0);
13915 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13916 vcgez_s64 (int64x1_t __a)
13918 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
13921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13922 vcgezq_f32 (float32x4_t __a)
13924 return (uint32x4_t) (__a >= 0.0f);
13927 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13928 vcgezq_f64 (float64x2_t __a)
13930 return (uint64x2_t) (__a >= 0.0);
13933 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13934 vcgezq_s8 (int8x16_t __a)
13936 return (uint8x16_t) (__a >= 0);
13939 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13940 vcgezq_s16 (int16x8_t __a)
13942 return (uint16x8_t) (__a >= 0);
13945 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13946 vcgezq_s32 (int32x4_t __a)
13948 return (uint32x4_t) (__a >= 0);
13951 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13952 vcgezq_s64 (int64x2_t __a)
13954 return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
13957 /* vcgez - scalar. */
13959 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13960 vcgezs_f32 (float32_t __a)
13962 return __a >= 0.0f ? -1 : 0;
13965 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13966 vcgezd_s64 (int64_t __a)
13968 return __a >= 0 ? -1ll : 0ll;
13971 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13972 vcgezd_f64 (float64_t __a)
13974 return __a >= 0.0 ? -1ll : 0ll;
13977 /* vcgt - vector. */
13979 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13980 vcgt_f32 (float32x2_t __a, float32x2_t __b)
13982 return (uint32x2_t) (__a > __b);
13985 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13986 vcgt_f64 (float64x1_t __a, float64x1_t __b)
13988 return (uint64x1_t) (__a > __b);
13991 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13992 vcgt_s8 (int8x8_t __a, int8x8_t __b)
13994 return (uint8x8_t) (__a > __b);
13997 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13998 vcgt_s16 (int16x4_t __a, int16x4_t __b)
14000 return (uint16x4_t) (__a > __b);
14003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14004 vcgt_s32 (int32x2_t __a, int32x2_t __b)
14006 return (uint32x2_t) (__a > __b);
14009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14010 vcgt_s64 (int64x1_t __a, int64x1_t __b)
14012 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14015 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14016 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
14018 return (__a > __b);
14021 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14022 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
14024 return (__a > __b);
14027 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14028 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
14030 return (__a > __b);
14033 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14034 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
14036 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14039 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14040 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
14042 return (uint32x4_t) (__a > __b);
14045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14046 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
14048 return (uint64x2_t) (__a > __b);
14051 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14052 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
14054 return (uint8x16_t) (__a > __b);
14057 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14058 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
14060 return (uint16x8_t) (__a > __b);
14063 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14064 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
14066 return (uint32x4_t) (__a > __b);
14069 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14070 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
14072 return (uint64x2_t) (__a > __b);
14075 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14076 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
14078 return (__a > __b);
14081 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14082 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
14084 return (__a > __b);
14087 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14088 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
14090 return (__a > __b);
14093 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14094 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
14096 return (__a > __b);
14099 /* vcgt - scalar. */
14101 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14102 vcgts_f32 (float32_t __a, float32_t __b)
14104 return __a > __b ? -1 : 0;
14107 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14108 vcgtd_s64 (int64_t __a, int64_t __b)
14110 return __a > __b ? -1ll : 0ll;
14113 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14114 vcgtd_u64 (uint64_t __a, uint64_t __b)
14116 return __a > __b ? -1ll : 0ll;
14119 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14120 vcgtd_f64 (float64_t __a, float64_t __b)
14122 return __a > __b ? -1ll : 0ll;
14125 /* vcgtz - vector. */
14127 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14128 vcgtz_f32 (float32x2_t __a)
14130 return (uint32x2_t) (__a > 0.0f);
14133 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14134 vcgtz_f64 (float64x1_t __a)
14136 return (uint64x1_t) (__a > (float64x1_t) {0.0});
14139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14140 vcgtz_s8 (int8x8_t __a)
14142 return (uint8x8_t) (__a > 0);
14145 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14146 vcgtz_s16 (int16x4_t __a)
14148 return (uint16x4_t) (__a > 0);
14151 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14152 vcgtz_s32 (int32x2_t __a)
14154 return (uint32x2_t) (__a > 0);
14157 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14158 vcgtz_s64 (int64x1_t __a)
14160 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
14163 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14164 vcgtzq_f32 (float32x4_t __a)
14166 return (uint32x4_t) (__a > 0.0f);
14169 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14170 vcgtzq_f64 (float64x2_t __a)
14172 return (uint64x2_t) (__a > 0.0);
14175 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14176 vcgtzq_s8 (int8x16_t __a)
14178 return (uint8x16_t) (__a > 0);
14181 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14182 vcgtzq_s16 (int16x8_t __a)
14184 return (uint16x8_t) (__a > 0);
14187 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14188 vcgtzq_s32 (int32x4_t __a)
14190 return (uint32x4_t) (__a > 0);
14193 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14194 vcgtzq_s64 (int64x2_t __a)
14196 return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
14199 /* vcgtz - scalar. */
14201 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14202 vcgtzs_f32 (float32_t __a)
14204 return __a > 0.0f ? -1 : 0;
14207 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14208 vcgtzd_s64 (int64_t __a)
14210 return __a > 0 ? -1ll : 0ll;
14213 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14214 vcgtzd_f64 (float64_t __a)
14216 return __a > 0.0 ? -1ll : 0ll;
14219 /* vcle - vector. */
14221 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14222 vcle_f32 (float32x2_t __a, float32x2_t __b)
14224 return (uint32x2_t) (__a <= __b);
14227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14228 vcle_f64 (float64x1_t __a, float64x1_t __b)
14230 return (uint64x1_t) (__a <= __b);
14233 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14234 vcle_s8 (int8x8_t __a, int8x8_t __b)
14236 return (uint8x8_t) (__a <= __b);
14239 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14240 vcle_s16 (int16x4_t __a, int16x4_t __b)
14242 return (uint16x4_t) (__a <= __b);
14245 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14246 vcle_s32 (int32x2_t __a, int32x2_t __b)
14248 return (uint32x2_t) (__a <= __b);
14251 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14252 vcle_s64 (int64x1_t __a, int64x1_t __b)
14254 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14257 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14258 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
14260 return (__a <= __b);
14263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14264 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
14266 return (__a <= __b);
14269 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14270 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
14272 return (__a <= __b);
14275 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14276 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
14278 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14281 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14282 vcleq_f32 (float32x4_t __a, float32x4_t __b)
14284 return (uint32x4_t) (__a <= __b);
14287 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14288 vcleq_f64 (float64x2_t __a, float64x2_t __b)
14290 return (uint64x2_t) (__a <= __b);
14293 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14294 vcleq_s8 (int8x16_t __a, int8x16_t __b)
14296 return (uint8x16_t) (__a <= __b);
14299 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14300 vcleq_s16 (int16x8_t __a, int16x8_t __b)
14302 return (uint16x8_t) (__a <= __b);
14305 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14306 vcleq_s32 (int32x4_t __a, int32x4_t __b)
14308 return (uint32x4_t) (__a <= __b);
14311 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14312 vcleq_s64 (int64x2_t __a, int64x2_t __b)
14314 return (uint64x2_t) (__a <= __b);
14317 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14318 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
14320 return (__a <= __b);
14323 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14324 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
14326 return (__a <= __b);
14329 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14330 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
14332 return (__a <= __b);
14335 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14336 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
14338 return (__a <= __b);
14341 /* vcle - scalar. */
14343 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14344 vcles_f32 (float32_t __a, float32_t __b)
14346 return __a <= __b ? -1 : 0;
14349 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14350 vcled_s64 (int64_t __a, int64_t __b)
14352 return __a <= __b ? -1ll : 0ll;
14355 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14356 vcled_u64 (uint64_t __a, uint64_t __b)
14358 return __a <= __b ? -1ll : 0ll;
14361 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14362 vcled_f64 (float64_t __a, float64_t __b)
14364 return __a <= __b ? -1ll : 0ll;
14367 /* vclez - vector. */
14369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14370 vclez_f32 (float32x2_t __a)
14372 return (uint32x2_t) (__a <= 0.0f);
14375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14376 vclez_f64 (float64x1_t __a)
14378 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
14381 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14382 vclez_s8 (int8x8_t __a)
14384 return (uint8x8_t) (__a <= 0);
14387 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14388 vclez_s16 (int16x4_t __a)
14390 return (uint16x4_t) (__a <= 0);
14393 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14394 vclez_s32 (int32x2_t __a)
14396 return (uint32x2_t) (__a <= 0);
14399 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14400 vclez_s64 (int64x1_t __a)
14402 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
14405 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14406 vclezq_f32 (float32x4_t __a)
14408 return (uint32x4_t) (__a <= 0.0f);
14411 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14412 vclezq_f64 (float64x2_t __a)
14414 return (uint64x2_t) (__a <= 0.0);
14417 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14418 vclezq_s8 (int8x16_t __a)
14420 return (uint8x16_t) (__a <= 0);
14423 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14424 vclezq_s16 (int16x8_t __a)
14426 return (uint16x8_t) (__a <= 0);
14429 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14430 vclezq_s32 (int32x4_t __a)
14432 return (uint32x4_t) (__a <= 0);
14435 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14436 vclezq_s64 (int64x2_t __a)
14438 return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
14441 /* vclez - scalar. */
14443 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14444 vclezs_f32 (float32_t __a)
14446 return __a <= 0.0f ? -1 : 0;
14449 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14450 vclezd_s64 (int64_t __a)
14452 return __a <= 0 ? -1ll : 0ll;
14455 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14456 vclezd_f64 (float64_t __a)
14458 return __a <= 0.0 ? -1ll : 0ll;
14461 /* vclt - vector. */
14463 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14464 vclt_f32 (float32x2_t __a, float32x2_t __b)
14466 return (uint32x2_t) (__a < __b);
14469 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14470 vclt_f64 (float64x1_t __a, float64x1_t __b)
14472 return (uint64x1_t) (__a < __b);
14475 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14476 vclt_s8 (int8x8_t __a, int8x8_t __b)
14478 return (uint8x8_t) (__a < __b);
14481 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14482 vclt_s16 (int16x4_t __a, int16x4_t __b)
14484 return (uint16x4_t) (__a < __b);
14487 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14488 vclt_s32 (int32x2_t __a, int32x2_t __b)
14490 return (uint32x2_t) (__a < __b);
14493 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14494 vclt_s64 (int64x1_t __a, int64x1_t __b)
14496 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
14499 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14500 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
14502 return (__a < __b);
14505 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14506 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
14508 return (__a < __b);
14511 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14512 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
14514 return (__a < __b);
14517 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14518 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
14520 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
14523 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14524 vcltq_f32 (float32x4_t __a, float32x4_t __b)
14526 return (uint32x4_t) (__a < __b);
14529 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14530 vcltq_f64 (float64x2_t __a, float64x2_t __b)
14532 return (uint64x2_t) (__a < __b);
14535 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14536 vcltq_s8 (int8x16_t __a, int8x16_t __b)
14538 return (uint8x16_t) (__a < __b);
14541 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14542 vcltq_s16 (int16x8_t __a, int16x8_t __b)
14544 return (uint16x8_t) (__a < __b);
14547 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14548 vcltq_s32 (int32x4_t __a, int32x4_t __b)
14550 return (uint32x4_t) (__a < __b);
14553 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14554 vcltq_s64 (int64x2_t __a, int64x2_t __b)
14556 return (uint64x2_t) (__a < __b);
14559 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14560 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
14562 return (__a < __b);
14565 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14566 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
14568 return (__a < __b);
14571 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14572 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
14574 return (__a < __b);
14577 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14578 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
14580 return (__a < __b);
14583 /* vclt - scalar. */
14585 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14586 vclts_f32 (float32_t __a, float32_t __b)
14588 return __a < __b ? -1 : 0;
14591 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14592 vcltd_s64 (int64_t __a, int64_t __b)
14594 return __a < __b ? -1ll : 0ll;
14597 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14598 vcltd_u64 (uint64_t __a, uint64_t __b)
14600 return __a < __b ? -1ll : 0ll;
14603 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14604 vcltd_f64 (float64_t __a, float64_t __b)
14606 return __a < __b ? -1ll : 0ll;
14609 /* vcltz - vector. */
14611 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14612 vcltz_f32 (float32x2_t __a)
14614 return (uint32x2_t) (__a < 0.0f);
14617 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14618 vcltz_f64 (float64x1_t __a)
14620 return (uint64x1_t) (__a < (float64x1_t) {0.0});
14623 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14624 vcltz_s8 (int8x8_t __a)
14626 return (uint8x8_t) (__a < 0);
14629 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14630 vcltz_s16 (int16x4_t __a)
14632 return (uint16x4_t) (__a < 0);
14635 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14636 vcltz_s32 (int32x2_t __a)
14638 return (uint32x2_t) (__a < 0);
14641 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14642 vcltz_s64 (int64x1_t __a)
14644 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
14647 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14648 vcltzq_f32 (float32x4_t __a)
14650 return (uint32x4_t) (__a < 0.0f);
14653 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14654 vcltzq_f64 (float64x2_t __a)
14656 return (uint64x2_t) (__a < 0.0);
14659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14660 vcltzq_s8 (int8x16_t __a)
14662 return (uint8x16_t) (__a < 0);
14665 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14666 vcltzq_s16 (int16x8_t __a)
14668 return (uint16x8_t) (__a < 0);
14671 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14672 vcltzq_s32 (int32x4_t __a)
14674 return (uint32x4_t) (__a < 0);
14677 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14678 vcltzq_s64 (int64x2_t __a)
14680 return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
14683 /* vcltz - scalar. */
14685 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14686 vcltzs_f32 (float32_t __a)
14688 return __a < 0.0f ? -1 : 0;
14691 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14692 vcltzd_s64 (int64_t __a)
14694 return __a < 0 ? -1ll : 0ll;
14697 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14698 vcltzd_f64 (float64_t __a)
14700 return __a < 0.0 ? -1ll : 0ll;
14703 /* vclz. */
14705 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14706 vclz_s8 (int8x8_t __a)
14708 return __builtin_aarch64_clzv8qi (__a);
14711 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14712 vclz_s16 (int16x4_t __a)
14714 return __builtin_aarch64_clzv4hi (__a);
14717 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14718 vclz_s32 (int32x2_t __a)
14720 return __builtin_aarch64_clzv2si (__a);
14723 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14724 vclz_u8 (uint8x8_t __a)
14726 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
14729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14730 vclz_u16 (uint16x4_t __a)
14732 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
14735 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14736 vclz_u32 (uint32x2_t __a)
14738 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
14741 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14742 vclzq_s8 (int8x16_t __a)
14744 return __builtin_aarch64_clzv16qi (__a);
14747 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14748 vclzq_s16 (int16x8_t __a)
14750 return __builtin_aarch64_clzv8hi (__a);
14753 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14754 vclzq_s32 (int32x4_t __a)
14756 return __builtin_aarch64_clzv4si (__a);
14759 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14760 vclzq_u8 (uint8x16_t __a)
14762 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
14765 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14766 vclzq_u16 (uint16x8_t __a)
14768 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
14771 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14772 vclzq_u32 (uint32x4_t __a)
14774 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
14777 /* vcvt (double -> float). */
14779 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14780 vcvt_f32_f64 (float64x2_t __a)
14782 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
14785 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14786 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
14788 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
14791 /* vcvt (float -> double). */
14793 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14794 vcvt_f64_f32 (float32x2_t __a)
14797 return __builtin_aarch64_float_extend_lo_v2df (__a);
14800 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14801 vcvt_high_f64_f32 (float32x4_t __a)
14803 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
14806 /* vcvt (<u>int -> float) */
14808 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14809 vcvtd_f64_s64 (int64_t __a)
14811 return (float64_t) __a;
14814 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14815 vcvtd_f64_u64 (uint64_t __a)
14817 return (float64_t) __a;
14820 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14821 vcvts_f32_s32 (int32_t __a)
14823 return (float32_t) __a;
14826 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14827 vcvts_f32_u32 (uint32_t __a)
14829 return (float32_t) __a;
14832 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14833 vcvt_f32_s32 (int32x2_t __a)
14835 return __builtin_aarch64_floatv2siv2sf (__a);
14838 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14839 vcvt_f32_u32 (uint32x2_t __a)
14841 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
14844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14845 vcvtq_f32_s32 (int32x4_t __a)
14847 return __builtin_aarch64_floatv4siv4sf (__a);
14850 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14851 vcvtq_f32_u32 (uint32x4_t __a)
14853 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
14856 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14857 vcvtq_f64_s64 (int64x2_t __a)
14859 return __builtin_aarch64_floatv2div2df (__a);
14862 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14863 vcvtq_f64_u64 (uint64x2_t __a)
14865 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
14868 /* vcvt (float -> <u>int) */
14870 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14871 vcvtd_s64_f64 (float64_t __a)
14873 return (int64_t) __a;
14876 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14877 vcvtd_u64_f64 (float64_t __a)
14879 return (uint64_t) __a;
14882 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14883 vcvts_s32_f32 (float32_t __a)
14885 return (int32_t) __a;
14888 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14889 vcvts_u32_f32 (float32_t __a)
14891 return (uint32_t) __a;
14894 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14895 vcvt_s32_f32 (float32x2_t __a)
14897 return __builtin_aarch64_lbtruncv2sfv2si (__a);
14900 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14901 vcvt_u32_f32 (float32x2_t __a)
14903 /* TODO: This cast should go away when builtins have
14904 their correct types. */
14905 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
14908 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14909 vcvtq_s32_f32 (float32x4_t __a)
14911 return __builtin_aarch64_lbtruncv4sfv4si (__a);
14914 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14915 vcvtq_u32_f32 (float32x4_t __a)
14917 /* TODO: This cast should go away when builtins have
14918 their correct types. */
14919 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
14922 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14923 vcvtq_s64_f64 (float64x2_t __a)
14925 return __builtin_aarch64_lbtruncv2dfv2di (__a);
14928 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14929 vcvtq_u64_f64 (float64x2_t __a)
14931 /* TODO: This cast should go away when builtins have
14932 their correct types. */
14933 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
14936 /* vcvta */
14938 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14939 vcvtad_s64_f64 (float64_t __a)
14941 return __builtin_aarch64_lrounddfdi (__a);
14944 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14945 vcvtad_u64_f64 (float64_t __a)
14947 return __builtin_aarch64_lroundudfdi (__a);
14950 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14951 vcvtas_s32_f32 (float32_t __a)
14953 return __builtin_aarch64_lroundsfsi (__a);
14956 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14957 vcvtas_u32_f32 (float32_t __a)
14959 return __builtin_aarch64_lroundusfsi (__a);
14962 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14963 vcvta_s32_f32 (float32x2_t __a)
14965 return __builtin_aarch64_lroundv2sfv2si (__a);
14968 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14969 vcvta_u32_f32 (float32x2_t __a)
14971 /* TODO: This cast should go away when builtins have
14972 their correct types. */
14973 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
14976 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14977 vcvtaq_s32_f32 (float32x4_t __a)
14979 return __builtin_aarch64_lroundv4sfv4si (__a);
14982 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14983 vcvtaq_u32_f32 (float32x4_t __a)
14985 /* TODO: This cast should go away when builtins have
14986 their correct types. */
14987 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
14990 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14991 vcvtaq_s64_f64 (float64x2_t __a)
14993 return __builtin_aarch64_lroundv2dfv2di (__a);
14996 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14997 vcvtaq_u64_f64 (float64x2_t __a)
14999 /* TODO: This cast should go away when builtins have
15000 their correct types. */
15001 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
15004 /* vcvtm */
15006 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15007 vcvtmd_s64_f64 (float64_t __a)
15009 return __builtin_llfloor (__a);
15012 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15013 vcvtmd_u64_f64 (float64_t __a)
15015 return __builtin_aarch64_lfloorudfdi (__a);
15018 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15019 vcvtms_s32_f32 (float32_t __a)
15021 return __builtin_ifloorf (__a);
15024 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15025 vcvtms_u32_f32 (float32_t __a)
15027 return __builtin_aarch64_lfloorusfsi (__a);
15030 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15031 vcvtm_s32_f32 (float32x2_t __a)
15033 return __builtin_aarch64_lfloorv2sfv2si (__a);
15036 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15037 vcvtm_u32_f32 (float32x2_t __a)
15039 /* TODO: This cast should go away when builtins have
15040 their correct types. */
15041 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
15044 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15045 vcvtmq_s32_f32 (float32x4_t __a)
15047 return __builtin_aarch64_lfloorv4sfv4si (__a);
15050 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15051 vcvtmq_u32_f32 (float32x4_t __a)
15053 /* TODO: This cast should go away when builtins have
15054 their correct types. */
15055 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
15058 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15059 vcvtmq_s64_f64 (float64x2_t __a)
15061 return __builtin_aarch64_lfloorv2dfv2di (__a);
15064 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15065 vcvtmq_u64_f64 (float64x2_t __a)
15067 /* TODO: This cast should go away when builtins have
15068 their correct types. */
15069 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
15072 /* vcvtn */
15074 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15075 vcvtnd_s64_f64 (float64_t __a)
15077 return __builtin_aarch64_lfrintndfdi (__a);
15080 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15081 vcvtnd_u64_f64 (float64_t __a)
15083 return __builtin_aarch64_lfrintnudfdi (__a);
15086 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15087 vcvtns_s32_f32 (float32_t __a)
15089 return __builtin_aarch64_lfrintnsfsi (__a);
15092 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15093 vcvtns_u32_f32 (float32_t __a)
15095 return __builtin_aarch64_lfrintnusfsi (__a);
15098 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15099 vcvtn_s32_f32 (float32x2_t __a)
15101 return __builtin_aarch64_lfrintnv2sfv2si (__a);
15104 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15105 vcvtn_u32_f32 (float32x2_t __a)
15107 /* TODO: This cast should go away when builtins have
15108 their correct types. */
15109 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
15112 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15113 vcvtnq_s32_f32 (float32x4_t __a)
15115 return __builtin_aarch64_lfrintnv4sfv4si (__a);
15118 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15119 vcvtnq_u32_f32 (float32x4_t __a)
15121 /* TODO: This cast should go away when builtins have
15122 their correct types. */
15123 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
15126 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15127 vcvtnq_s64_f64 (float64x2_t __a)
15129 return __builtin_aarch64_lfrintnv2dfv2di (__a);
15132 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15133 vcvtnq_u64_f64 (float64x2_t __a)
15135 /* TODO: This cast should go away when builtins have
15136 their correct types. */
15137 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
15140 /* vcvtp */
15142 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15143 vcvtpd_s64_f64 (float64_t __a)
15145 return __builtin_llceil (__a);
15148 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15149 vcvtpd_u64_f64 (float64_t __a)
15151 return __builtin_aarch64_lceiludfdi (__a);
15154 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15155 vcvtps_s32_f32 (float32_t __a)
15157 return __builtin_iceilf (__a);
15160 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15161 vcvtps_u32_f32 (float32_t __a)
15163 return __builtin_aarch64_lceilusfsi (__a);
15166 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15167 vcvtp_s32_f32 (float32x2_t __a)
15169 return __builtin_aarch64_lceilv2sfv2si (__a);
15172 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15173 vcvtp_u32_f32 (float32x2_t __a)
15175 /* TODO: This cast should go away when builtins have
15176 their correct types. */
15177 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
15180 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15181 vcvtpq_s32_f32 (float32x4_t __a)
15183 return __builtin_aarch64_lceilv4sfv4si (__a);
15186 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15187 vcvtpq_u32_f32 (float32x4_t __a)
15189 /* TODO: This cast should go away when builtins have
15190 their correct types. */
15191 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
15194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15195 vcvtpq_s64_f64 (float64x2_t __a)
15197 return __builtin_aarch64_lceilv2dfv2di (__a);
15200 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15201 vcvtpq_u64_f64 (float64x2_t __a)
15203 /* TODO: This cast should go away when builtins have
15204 their correct types. */
15205 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
15208 /* vdup_n */
15210 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15211 vdup_n_f32 (float32_t __a)
15213 return (float32x2_t) {__a, __a};
15216 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15217 vdup_n_f64 (float64_t __a)
15219 return (float64x1_t) {__a};
15222 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15223 vdup_n_p8 (poly8_t __a)
15225 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15228 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15229 vdup_n_p16 (poly16_t __a)
15231 return (poly16x4_t) {__a, __a, __a, __a};
15234 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15235 vdup_n_s8 (int8_t __a)
15237 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15240 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15241 vdup_n_s16 (int16_t __a)
15243 return (int16x4_t) {__a, __a, __a, __a};
15246 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15247 vdup_n_s32 (int32_t __a)
15249 return (int32x2_t) {__a, __a};
15252 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15253 vdup_n_s64 (int64_t __a)
15255 return (int64x1_t) {__a};
15258 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15259 vdup_n_u8 (uint8_t __a)
15261 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15264 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15265 vdup_n_u16 (uint16_t __a)
15267 return (uint16x4_t) {__a, __a, __a, __a};
15270 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15271 vdup_n_u32 (uint32_t __a)
15273 return (uint32x2_t) {__a, __a};
15276 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15277 vdup_n_u64 (uint64_t __a)
15279 return (uint64x1_t) {__a};
15282 /* vdupq_n */
15284 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15285 vdupq_n_f32 (float32_t __a)
15287 return (float32x4_t) {__a, __a, __a, __a};
15290 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15291 vdupq_n_f64 (float64_t __a)
15293 return (float64x2_t) {__a, __a};
15296 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15297 vdupq_n_p8 (uint32_t __a)
15299 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15300 __a, __a, __a, __a, __a, __a, __a, __a};
15303 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15304 vdupq_n_p16 (uint32_t __a)
15306 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15309 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15310 vdupq_n_s8 (int32_t __a)
15312 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15313 __a, __a, __a, __a, __a, __a, __a, __a};
15316 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15317 vdupq_n_s16 (int32_t __a)
15319 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15323 vdupq_n_s32 (int32_t __a)
15325 return (int32x4_t) {__a, __a, __a, __a};
15328 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15329 vdupq_n_s64 (int64_t __a)
15331 return (int64x2_t) {__a, __a};
15334 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15335 vdupq_n_u8 (uint32_t __a)
15337 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15338 __a, __a, __a, __a, __a, __a, __a, __a};
15341 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15342 vdupq_n_u16 (uint32_t __a)
15344 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15348 vdupq_n_u32 (uint32_t __a)
15350 return (uint32x4_t) {__a, __a, __a, __a};
15353 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15354 vdupq_n_u64 (uint64_t __a)
15356 return (uint64x2_t) {__a, __a};
15359 /* vdup_lane */
15361 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15362 vdup_lane_f32 (float32x2_t __a, const int __b)
15364 return __aarch64_vdup_lane_f32 (__a, __b);
15367 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15368 vdup_lane_f64 (float64x1_t __a, const int __b)
15370 return __aarch64_vdup_lane_f64 (__a, __b);
15373 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15374 vdup_lane_p8 (poly8x8_t __a, const int __b)
15376 return __aarch64_vdup_lane_p8 (__a, __b);
15379 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15380 vdup_lane_p16 (poly16x4_t __a, const int __b)
15382 return __aarch64_vdup_lane_p16 (__a, __b);
15385 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15386 vdup_lane_s8 (int8x8_t __a, const int __b)
15388 return __aarch64_vdup_lane_s8 (__a, __b);
15391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15392 vdup_lane_s16 (int16x4_t __a, const int __b)
15394 return __aarch64_vdup_lane_s16 (__a, __b);
15397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15398 vdup_lane_s32 (int32x2_t __a, const int __b)
15400 return __aarch64_vdup_lane_s32 (__a, __b);
15403 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15404 vdup_lane_s64 (int64x1_t __a, const int __b)
15406 return __aarch64_vdup_lane_s64 (__a, __b);
15409 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15410 vdup_lane_u8 (uint8x8_t __a, const int __b)
15412 return __aarch64_vdup_lane_u8 (__a, __b);
15415 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15416 vdup_lane_u16 (uint16x4_t __a, const int __b)
15418 return __aarch64_vdup_lane_u16 (__a, __b);
15421 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15422 vdup_lane_u32 (uint32x2_t __a, const int __b)
15424 return __aarch64_vdup_lane_u32 (__a, __b);
15427 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15428 vdup_lane_u64 (uint64x1_t __a, const int __b)
15430 return __aarch64_vdup_lane_u64 (__a, __b);
15433 /* vdup_laneq */
15435 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15436 vdup_laneq_f32 (float32x4_t __a, const int __b)
15438 return __aarch64_vdup_laneq_f32 (__a, __b);
15441 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15442 vdup_laneq_f64 (float64x2_t __a, const int __b)
15444 return __aarch64_vdup_laneq_f64 (__a, __b);
15447 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15448 vdup_laneq_p8 (poly8x16_t __a, const int __b)
15450 return __aarch64_vdup_laneq_p8 (__a, __b);
15453 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15454 vdup_laneq_p16 (poly16x8_t __a, const int __b)
15456 return __aarch64_vdup_laneq_p16 (__a, __b);
15459 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15460 vdup_laneq_s8 (int8x16_t __a, const int __b)
15462 return __aarch64_vdup_laneq_s8 (__a, __b);
15465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15466 vdup_laneq_s16 (int16x8_t __a, const int __b)
15468 return __aarch64_vdup_laneq_s16 (__a, __b);
15471 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15472 vdup_laneq_s32 (int32x4_t __a, const int __b)
15474 return __aarch64_vdup_laneq_s32 (__a, __b);
15477 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15478 vdup_laneq_s64 (int64x2_t __a, const int __b)
15480 return __aarch64_vdup_laneq_s64 (__a, __b);
15483 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15484 vdup_laneq_u8 (uint8x16_t __a, const int __b)
15486 return __aarch64_vdup_laneq_u8 (__a, __b);
15489 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15490 vdup_laneq_u16 (uint16x8_t __a, const int __b)
15492 return __aarch64_vdup_laneq_u16 (__a, __b);
15495 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15496 vdup_laneq_u32 (uint32x4_t __a, const int __b)
15498 return __aarch64_vdup_laneq_u32 (__a, __b);
15501 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15502 vdup_laneq_u64 (uint64x2_t __a, const int __b)
15504 return __aarch64_vdup_laneq_u64 (__a, __b);
15507 /* vdupq_lane */
15508 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15509 vdupq_lane_f32 (float32x2_t __a, const int __b)
15511 return __aarch64_vdupq_lane_f32 (__a, __b);
15514 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15515 vdupq_lane_f64 (float64x1_t __a, const int __b)
15517 return __aarch64_vdupq_lane_f64 (__a, __b);
15520 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15521 vdupq_lane_p8 (poly8x8_t __a, const int __b)
15523 return __aarch64_vdupq_lane_p8 (__a, __b);
15526 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15527 vdupq_lane_p16 (poly16x4_t __a, const int __b)
15529 return __aarch64_vdupq_lane_p16 (__a, __b);
15532 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15533 vdupq_lane_s8 (int8x8_t __a, const int __b)
15535 return __aarch64_vdupq_lane_s8 (__a, __b);
15538 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15539 vdupq_lane_s16 (int16x4_t __a, const int __b)
15541 return __aarch64_vdupq_lane_s16 (__a, __b);
15544 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15545 vdupq_lane_s32 (int32x2_t __a, const int __b)
15547 return __aarch64_vdupq_lane_s32 (__a, __b);
15550 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15551 vdupq_lane_s64 (int64x1_t __a, const int __b)
15553 return __aarch64_vdupq_lane_s64 (__a, __b);
15556 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15557 vdupq_lane_u8 (uint8x8_t __a, const int __b)
15559 return __aarch64_vdupq_lane_u8 (__a, __b);
15562 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15563 vdupq_lane_u16 (uint16x4_t __a, const int __b)
15565 return __aarch64_vdupq_lane_u16 (__a, __b);
15568 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15569 vdupq_lane_u32 (uint32x2_t __a, const int __b)
15571 return __aarch64_vdupq_lane_u32 (__a, __b);
15574 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15575 vdupq_lane_u64 (uint64x1_t __a, const int __b)
15577 return __aarch64_vdupq_lane_u64 (__a, __b);
15580 /* vdupq_laneq */
15581 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15582 vdupq_laneq_f32 (float32x4_t __a, const int __b)
15584 return __aarch64_vdupq_laneq_f32 (__a, __b);
15587 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15588 vdupq_laneq_f64 (float64x2_t __a, const int __b)
15590 return __aarch64_vdupq_laneq_f64 (__a, __b);
15593 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15594 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
15596 return __aarch64_vdupq_laneq_p8 (__a, __b);
15599 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15600 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
15602 return __aarch64_vdupq_laneq_p16 (__a, __b);
15605 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15606 vdupq_laneq_s8 (int8x16_t __a, const int __b)
15608 return __aarch64_vdupq_laneq_s8 (__a, __b);
15611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15612 vdupq_laneq_s16 (int16x8_t __a, const int __b)
15614 return __aarch64_vdupq_laneq_s16 (__a, __b);
15617 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15618 vdupq_laneq_s32 (int32x4_t __a, const int __b)
15620 return __aarch64_vdupq_laneq_s32 (__a, __b);
15623 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15624 vdupq_laneq_s64 (int64x2_t __a, const int __b)
15626 return __aarch64_vdupq_laneq_s64 (__a, __b);
15629 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15630 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
15632 return __aarch64_vdupq_laneq_u8 (__a, __b);
15635 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15636 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
15638 return __aarch64_vdupq_laneq_u16 (__a, __b);
15641 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15642 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
15644 return __aarch64_vdupq_laneq_u32 (__a, __b);
15647 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15648 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
15650 return __aarch64_vdupq_laneq_u64 (__a, __b);
15653 /* vdupb_lane */
15654 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
15655 vdupb_lane_p8 (poly8x8_t __a, const int __b)
15657 return __aarch64_vget_lane_p8 (__a, __b);
15660 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15661 vdupb_lane_s8 (int8x8_t __a, const int __b)
15663 return __aarch64_vget_lane_s8 (__a, __b);
15666 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15667 vdupb_lane_u8 (uint8x8_t __a, const int __b)
15669 return __aarch64_vget_lane_u8 (__a, __b);
15672 /* vduph_lane */
15673 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
15674 vduph_lane_p16 (poly16x4_t __a, const int __b)
15676 return __aarch64_vget_lane_p16 (__a, __b);
15679 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15680 vduph_lane_s16 (int16x4_t __a, const int __b)
15682 return __aarch64_vget_lane_s16 (__a, __b);
15685 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15686 vduph_lane_u16 (uint16x4_t __a, const int __b)
15688 return __aarch64_vget_lane_u16 (__a, __b);
15691 /* vdups_lane */
15692 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15693 vdups_lane_f32 (float32x2_t __a, const int __b)
15695 return __aarch64_vget_lane_f32 (__a, __b);
15698 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15699 vdups_lane_s32 (int32x2_t __a, const int __b)
15701 return __aarch64_vget_lane_s32 (__a, __b);
15704 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15705 vdups_lane_u32 (uint32x2_t __a, const int __b)
15707 return __aarch64_vget_lane_u32 (__a, __b);
15710 /* vdupd_lane */
15711 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15712 vdupd_lane_f64 (float64x1_t __a, const int __b)
15714 __builtin_aarch64_im_lane_boundsi (__b, 1);
15715 return __a[0];
15718 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15719 vdupd_lane_s64 (int64x1_t __a, const int __b)
15721 __builtin_aarch64_im_lane_boundsi (__b, 1);
15722 return __a[0];
15725 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15726 vdupd_lane_u64 (uint64x1_t __a, const int __b)
15728 __builtin_aarch64_im_lane_boundsi (__b, 1);
15729 return __a[0];
15732 /* vdupb_laneq */
15733 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
15734 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
15736 return __aarch64_vgetq_lane_p8 (__a, __b);
15739 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15740 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
15742 return __aarch64_vgetq_lane_s8 (__a, __b);
15745 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15746 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
15748 return __aarch64_vgetq_lane_u8 (__a, __b);
15751 /* vduph_laneq */
15752 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
15753 vduph_laneq_p16 (poly16x8_t __a, const int __b)
15755 return __aarch64_vgetq_lane_p16 (__a, __b);
15758 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15759 vduph_laneq_s16 (int16x8_t __a, const int __b)
15761 return __aarch64_vgetq_lane_s16 (__a, __b);
15764 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15765 vduph_laneq_u16 (uint16x8_t __a, const int __b)
15767 return __aarch64_vgetq_lane_u16 (__a, __b);
15770 /* vdups_laneq */
15771 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15772 vdups_laneq_f32 (float32x4_t __a, const int __b)
15774 return __aarch64_vgetq_lane_f32 (__a, __b);
15777 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15778 vdups_laneq_s32 (int32x4_t __a, const int __b)
15780 return __aarch64_vgetq_lane_s32 (__a, __b);
15783 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15784 vdups_laneq_u32 (uint32x4_t __a, const int __b)
15786 return __aarch64_vgetq_lane_u32 (__a, __b);
15789 /* vdupd_laneq */
15790 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15791 vdupd_laneq_f64 (float64x2_t __a, const int __b)
15793 return __aarch64_vgetq_lane_f64 (__a, __b);
15796 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15797 vdupd_laneq_s64 (int64x2_t __a, const int __b)
15799 return __aarch64_vgetq_lane_s64 (__a, __b);
15802 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15803 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
15805 return __aarch64_vgetq_lane_u64 (__a, __b);
15808 /* vext */
15810 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15811 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
15813 __builtin_aarch64_im_lane_boundsi (__c, 2);
15814 #ifdef __AARCH64EB__
15815 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
15816 #else
15817 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
15818 #endif
15821 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15822 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
15824 /* The only possible index to the assembler instruction returns element 0. */
15825 __builtin_aarch64_im_lane_boundsi (__c, 1);
15826 return __a;
15828 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15829 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
15831 __builtin_aarch64_im_lane_boundsi (__c, 8);
15832 #ifdef __AARCH64EB__
15833 return __builtin_shuffle (__b, __a, (uint8x8_t)
15834 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15835 #else
15836 return __builtin_shuffle (__a, __b,
15837 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15838 #endif
15841 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15842 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
15844 __builtin_aarch64_im_lane_boundsi (__c, 4);
15845 #ifdef __AARCH64EB__
15846 return __builtin_shuffle (__b, __a,
15847 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15848 #else
15849 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
15850 #endif
15853 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15854 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
15856 __builtin_aarch64_im_lane_boundsi (__c, 8);
15857 #ifdef __AARCH64EB__
15858 return __builtin_shuffle (__b, __a, (uint8x8_t)
15859 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15860 #else
15861 return __builtin_shuffle (__a, __b,
15862 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15863 #endif
15866 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15867 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
15869 __builtin_aarch64_im_lane_boundsi (__c, 4);
15870 #ifdef __AARCH64EB__
15871 return __builtin_shuffle (__b, __a,
15872 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15873 #else
15874 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
15875 #endif
15878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15879 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
15881 __builtin_aarch64_im_lane_boundsi (__c, 2);
15882 #ifdef __AARCH64EB__
15883 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
15884 #else
15885 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
15886 #endif
15889 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15890 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
15892 /* The only possible index to the assembler instruction returns element 0. */
15893 __builtin_aarch64_im_lane_boundsi (__c, 1);
15894 return __a;
15897 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15898 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
15900 __builtin_aarch64_im_lane_boundsi (__c, 8);
15901 #ifdef __AARCH64EB__
15902 return __builtin_shuffle (__b, __a, (uint8x8_t)
15903 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15904 #else
15905 return __builtin_shuffle (__a, __b,
15906 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15907 #endif
15910 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15911 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
15913 __builtin_aarch64_im_lane_boundsi (__c, 4);
15914 #ifdef __AARCH64EB__
15915 return __builtin_shuffle (__b, __a,
15916 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15917 #else
15918 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
15919 #endif
15922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15923 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
15925 __builtin_aarch64_im_lane_boundsi (__c, 2);
15926 #ifdef __AARCH64EB__
15927 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
15928 #else
15929 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
15930 #endif
15933 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15934 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
15936 /* The only possible index to the assembler instruction returns element 0. */
15937 __builtin_aarch64_im_lane_boundsi (__c, 1);
15938 return __a;
15941 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15942 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
15944 __builtin_aarch64_im_lane_boundsi (__c, 4);
15945 #ifdef __AARCH64EB__
15946 return __builtin_shuffle (__b, __a,
15947 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15948 #else
15949 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
15950 #endif
15953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15954 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
15956 __builtin_aarch64_im_lane_boundsi (__c, 2);
15957 #ifdef __AARCH64EB__
15958 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
15959 #else
15960 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
15961 #endif
15964 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15965 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
15967 __builtin_aarch64_im_lane_boundsi (__c, 16);
15968 #ifdef __AARCH64EB__
15969 return __builtin_shuffle (__b, __a, (uint8x16_t)
15970 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
15971 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
15972 #else
15973 return __builtin_shuffle (__a, __b, (uint8x16_t)
15974 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
15975 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
15976 #endif
15979 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15980 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
15982 __builtin_aarch64_im_lane_boundsi (__c, 8);
15983 #ifdef __AARCH64EB__
15984 return __builtin_shuffle (__b, __a, (uint16x8_t)
15985 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15986 #else
15987 return __builtin_shuffle (__a, __b,
15988 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15989 #endif
15992 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15993 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
15995 __builtin_aarch64_im_lane_boundsi (__c, 16);
15996 #ifdef __AARCH64EB__
15997 return __builtin_shuffle (__b, __a, (uint8x16_t)
15998 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
15999 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16000 #else
16001 return __builtin_shuffle (__a, __b, (uint8x16_t)
16002 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16003 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16004 #endif
16007 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16008 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
16010 __builtin_aarch64_im_lane_boundsi (__c, 8);
16011 #ifdef __AARCH64EB__
16012 return __builtin_shuffle (__b, __a, (uint16x8_t)
16013 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16014 #else
16015 return __builtin_shuffle (__a, __b,
16016 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16017 #endif
16020 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16021 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
16023 __builtin_aarch64_im_lane_boundsi (__c, 4);
16024 #ifdef __AARCH64EB__
16025 return __builtin_shuffle (__b, __a,
16026 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16027 #else
16028 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16029 #endif
16032 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16033 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
16035 __builtin_aarch64_im_lane_boundsi (__c, 2);
16036 #ifdef __AARCH64EB__
16037 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16038 #else
16039 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16040 #endif
16043 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16044 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
16046 __builtin_aarch64_im_lane_boundsi (__c, 16);
16047 #ifdef __AARCH64EB__
16048 return __builtin_shuffle (__b, __a, (uint8x16_t)
16049 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16050 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16051 #else
16052 return __builtin_shuffle (__a, __b, (uint8x16_t)
16053 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16054 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16055 #endif
16058 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16059 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
16061 __builtin_aarch64_im_lane_boundsi (__c, 8);
16062 #ifdef __AARCH64EB__
16063 return __builtin_shuffle (__b, __a, (uint16x8_t)
16064 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16065 #else
16066 return __builtin_shuffle (__a, __b,
16067 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16068 #endif
16071 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16072 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
16074 __builtin_aarch64_im_lane_boundsi (__c, 4);
16075 #ifdef __AARCH64EB__
16076 return __builtin_shuffle (__b, __a,
16077 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16078 #else
16079 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16080 #endif
16083 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16084 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
16086 __builtin_aarch64_im_lane_boundsi (__c, 2);
16087 #ifdef __AARCH64EB__
16088 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16089 #else
16090 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16091 #endif
16094 /* vfma */
16096 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16097 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16099 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16102 /* vfma_lane */
16104 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16105 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
16106 float32x2_t __c, const int __lane)
16108 return __builtin_aarch64_fmav2sf (__b,
16109 __aarch64_vdup_lane_f32 (__c, __lane),
16110 __a);
16113 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16114 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
16115 float64x1_t __c, const int __lane)
16117 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16120 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16121 vfmad_lane_f64 (float64_t __a, float64_t __b,
16122 float64x1_t __c, const int __lane)
16124 return __builtin_fma (__b, __c[0], __a);
16127 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16128 vfmas_lane_f32 (float32_t __a, float32_t __b,
16129 float32x2_t __c, const int __lane)
16131 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16134 /* vfma_laneq */
16136 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16137 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
16138 float32x4_t __c, const int __lane)
16140 return __builtin_aarch64_fmav2sf (__b,
16141 __aarch64_vdup_laneq_f32 (__c, __lane),
16142 __a);
16145 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16146 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
16147 float64x2_t __c, const int __lane)
16149 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16150 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
16153 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16154 vfmad_laneq_f64 (float64_t __a, float64_t __b,
16155 float64x2_t __c, const int __lane)
16157 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16160 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16161 vfmas_laneq_f32 (float32_t __a, float32_t __b,
16162 float32x4_t __c, const int __lane)
16164 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16167 /* vfmaq_lane */
16169 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16170 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
16171 float32x2_t __c, const int __lane)
16173 return __builtin_aarch64_fmav4sf (__b,
16174 __aarch64_vdupq_lane_f32 (__c, __lane),
16175 __a);
16178 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16179 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
16180 float64x1_t __c, const int __lane)
16182 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
16185 /* vfmaq_laneq */
16187 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16188 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16189 float32x4_t __c, const int __lane)
16191 return __builtin_aarch64_fmav4sf (__b,
16192 __aarch64_vdupq_laneq_f32 (__c, __lane),
16193 __a);
16196 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16197 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16198 float64x2_t __c, const int __lane)
16200 return __builtin_aarch64_fmav2df (__b,
16201 __aarch64_vdupq_laneq_f64 (__c, __lane),
16202 __a);
16205 /* vfms */
16207 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16208 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16210 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16213 /* vfms_lane */
16215 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16216 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
16217 float32x2_t __c, const int __lane)
16219 return __builtin_aarch64_fmav2sf (-__b,
16220 __aarch64_vdup_lane_f32 (__c, __lane),
16221 __a);
16224 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16225 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
16226 float64x1_t __c, const int __lane)
16228 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16231 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16232 vfmsd_lane_f64 (float64_t __a, float64_t __b,
16233 float64x1_t __c, const int __lane)
16235 return __builtin_fma (-__b, __c[0], __a);
16238 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16239 vfmss_lane_f32 (float32_t __a, float32_t __b,
16240 float32x2_t __c, const int __lane)
16242 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16245 /* vfms_laneq */
16247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16248 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
16249 float32x4_t __c, const int __lane)
16251 return __builtin_aarch64_fmav2sf (-__b,
16252 __aarch64_vdup_laneq_f32 (__c, __lane),
16253 __a);
16256 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16257 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
16258 float64x2_t __c, const int __lane)
16260 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16261 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
16264 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16265 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
16266 float64x2_t __c, const int __lane)
16268 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16271 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16272 vfmss_laneq_f32 (float32_t __a, float32_t __b,
16273 float32x4_t __c, const int __lane)
16275 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16278 /* vfmsq_lane */
16280 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16281 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
16282 float32x2_t __c, const int __lane)
16284 return __builtin_aarch64_fmav4sf (-__b,
16285 __aarch64_vdupq_lane_f32 (__c, __lane),
16286 __a);
16289 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16290 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
16291 float64x1_t __c, const int __lane)
16293 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
16296 /* vfmsq_laneq */
16298 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16299 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16300 float32x4_t __c, const int __lane)
16302 return __builtin_aarch64_fmav4sf (-__b,
16303 __aarch64_vdupq_laneq_f32 (__c, __lane),
16304 __a);
16307 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16308 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16309 float64x2_t __c, const int __lane)
16311 return __builtin_aarch64_fmav2df (-__b,
16312 __aarch64_vdupq_laneq_f64 (__c, __lane),
16313 __a);
16316 /* vld1 */
16318 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16319 vld1_f32 (const float32_t *a)
16321 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
16324 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16325 vld1_f64 (const float64_t *a)
16327 return (float64x1_t) {*a};
16330 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16331 vld1_p8 (const poly8_t *a)
16333 return (poly8x8_t)
16334 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16337 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16338 vld1_p16 (const poly16_t *a)
16340 return (poly16x4_t)
16341 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16344 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16345 vld1_s8 (const int8_t *a)
16347 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16350 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16351 vld1_s16 (const int16_t *a)
16353 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16356 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16357 vld1_s32 (const int32_t *a)
16359 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16362 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16363 vld1_s64 (const int64_t *a)
16365 return (int64x1_t) {*a};
16368 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16369 vld1_u8 (const uint8_t *a)
16371 return (uint8x8_t)
16372 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16375 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16376 vld1_u16 (const uint16_t *a)
16378 return (uint16x4_t)
16379 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16382 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16383 vld1_u32 (const uint32_t *a)
16385 return (uint32x2_t)
16386 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16389 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16390 vld1_u64 (const uint64_t *a)
16392 return (uint64x1_t) {*a};
16395 /* vld1q */
16397 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16398 vld1q_f32 (const float32_t *a)
16400 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
16403 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16404 vld1q_f64 (const float64_t *a)
16406 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
16409 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16410 vld1q_p8 (const poly8_t *a)
16412 return (poly8x16_t)
16413 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
16416 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16417 vld1q_p16 (const poly16_t *a)
16419 return (poly16x8_t)
16420 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
16423 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16424 vld1q_s8 (const int8_t *a)
16426 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
16429 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16430 vld1q_s16 (const int16_t *a)
16432 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
16435 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16436 vld1q_s32 (const int32_t *a)
16438 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
16441 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16442 vld1q_s64 (const int64_t *a)
16444 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
16447 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16448 vld1q_u8 (const uint8_t *a)
16450 return (uint8x16_t)
16451 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
16454 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16455 vld1q_u16 (const uint16_t *a)
16457 return (uint16x8_t)
16458 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
16461 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16462 vld1q_u32 (const uint32_t *a)
16464 return (uint32x4_t)
16465 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
16468 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16469 vld1q_u64 (const uint64_t *a)
16471 return (uint64x2_t)
16472 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
16475 /* vldn */
16477 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
16478 vld2_s64 (const int64_t * __a)
16480 int64x1x2_t ret;
16481 __builtin_aarch64_simd_oi __o;
16482 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
16483 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16484 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16485 return ret;
16488 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
16489 vld2_u64 (const uint64_t * __a)
16491 uint64x1x2_t ret;
16492 __builtin_aarch64_simd_oi __o;
16493 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
16494 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16495 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16496 return ret;
16499 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
16500 vld2_f64 (const float64_t * __a)
16502 float64x1x2_t ret;
16503 __builtin_aarch64_simd_oi __o;
16504 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
16505 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16506 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16507 return ret;
16510 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
16511 vld2_s8 (const int8_t * __a)
16513 int8x8x2_t ret;
16514 __builtin_aarch64_simd_oi __o;
16515 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
16516 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16517 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16518 return ret;
16521 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
16522 vld2_p8 (const poly8_t * __a)
16524 poly8x8x2_t ret;
16525 __builtin_aarch64_simd_oi __o;
16526 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
16527 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16528 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16529 return ret;
16532 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
16533 vld2_s16 (const int16_t * __a)
16535 int16x4x2_t ret;
16536 __builtin_aarch64_simd_oi __o;
16537 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
16538 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16539 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16540 return ret;
16543 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
16544 vld2_p16 (const poly16_t * __a)
16546 poly16x4x2_t ret;
16547 __builtin_aarch64_simd_oi __o;
16548 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
16549 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16550 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16551 return ret;
16554 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
16555 vld2_s32 (const int32_t * __a)
16557 int32x2x2_t ret;
16558 __builtin_aarch64_simd_oi __o;
16559 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
16560 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16561 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16562 return ret;
16565 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
16566 vld2_u8 (const uint8_t * __a)
16568 uint8x8x2_t ret;
16569 __builtin_aarch64_simd_oi __o;
16570 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
16571 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16572 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16573 return ret;
16576 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
16577 vld2_u16 (const uint16_t * __a)
16579 uint16x4x2_t ret;
16580 __builtin_aarch64_simd_oi __o;
16581 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
16582 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16583 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16584 return ret;
16587 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
16588 vld2_u32 (const uint32_t * __a)
16590 uint32x2x2_t ret;
16591 __builtin_aarch64_simd_oi __o;
16592 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
16593 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16594 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16595 return ret;
16598 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
16599 vld2_f32 (const float32_t * __a)
16601 float32x2x2_t ret;
16602 __builtin_aarch64_simd_oi __o;
16603 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
16604 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16605 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16606 return ret;
16609 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
16610 vld2q_s8 (const int8_t * __a)
16612 int8x16x2_t ret;
16613 __builtin_aarch64_simd_oi __o;
16614 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
16615 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16616 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16617 return ret;
16620 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
16621 vld2q_p8 (const poly8_t * __a)
16623 poly8x16x2_t ret;
16624 __builtin_aarch64_simd_oi __o;
16625 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
16626 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16627 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16628 return ret;
16631 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
16632 vld2q_s16 (const int16_t * __a)
16634 int16x8x2_t ret;
16635 __builtin_aarch64_simd_oi __o;
16636 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
16637 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16638 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16639 return ret;
16642 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
16643 vld2q_p16 (const poly16_t * __a)
16645 poly16x8x2_t ret;
16646 __builtin_aarch64_simd_oi __o;
16647 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
16648 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16649 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16650 return ret;
16653 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
16654 vld2q_s32 (const int32_t * __a)
16656 int32x4x2_t ret;
16657 __builtin_aarch64_simd_oi __o;
16658 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16659 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16660 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16661 return ret;
16664 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16665 vld2q_s64 (const int64_t * __a)
16667 int64x2x2_t ret;
16668 __builtin_aarch64_simd_oi __o;
16669 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16670 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16671 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16672 return ret;
16675 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16676 vld2q_u8 (const uint8_t * __a)
16678 uint8x16x2_t ret;
16679 __builtin_aarch64_simd_oi __o;
16680 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
16681 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16682 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16683 return ret;
16686 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16687 vld2q_u16 (const uint16_t * __a)
16689 uint16x8x2_t ret;
16690 __builtin_aarch64_simd_oi __o;
16691 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
16692 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16693 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16694 return ret;
16697 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16698 vld2q_u32 (const uint32_t * __a)
16700 uint32x4x2_t ret;
16701 __builtin_aarch64_simd_oi __o;
16702 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16703 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16704 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16705 return ret;
16708 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16709 vld2q_u64 (const uint64_t * __a)
16711 uint64x2x2_t ret;
16712 __builtin_aarch64_simd_oi __o;
16713 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16714 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16715 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16716 return ret;
16719 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16720 vld2q_f32 (const float32_t * __a)
16722 float32x4x2_t ret;
16723 __builtin_aarch64_simd_oi __o;
16724 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
16725 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16726 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16727 return ret;
16730 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16731 vld2q_f64 (const float64_t * __a)
16733 float64x2x2_t ret;
16734 __builtin_aarch64_simd_oi __o;
16735 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
16736 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16737 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16738 return ret;
16741 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16742 vld3_s64 (const int64_t * __a)
16744 int64x1x3_t ret;
16745 __builtin_aarch64_simd_ci __o;
16746 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16747 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16748 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16749 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16750 return ret;
16753 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16754 vld3_u64 (const uint64_t * __a)
16756 uint64x1x3_t ret;
16757 __builtin_aarch64_simd_ci __o;
16758 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16759 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16760 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16761 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16762 return ret;
16765 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16766 vld3_f64 (const float64_t * __a)
16768 float64x1x3_t ret;
16769 __builtin_aarch64_simd_ci __o;
16770 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
16771 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16772 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16773 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16774 return ret;
16777 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16778 vld3_s8 (const int8_t * __a)
16780 int8x8x3_t ret;
16781 __builtin_aarch64_simd_ci __o;
16782 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16783 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16784 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16785 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16786 return ret;
16789 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16790 vld3_p8 (const poly8_t * __a)
16792 poly8x8x3_t ret;
16793 __builtin_aarch64_simd_ci __o;
16794 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16795 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16796 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16797 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16798 return ret;
16801 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
16802 vld3_s16 (const int16_t * __a)
16804 int16x4x3_t ret;
16805 __builtin_aarch64_simd_ci __o;
16806 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16807 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16808 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16809 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16810 return ret;
16813 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
16814 vld3_p16 (const poly16_t * __a)
16816 poly16x4x3_t ret;
16817 __builtin_aarch64_simd_ci __o;
16818 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16819 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16820 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16821 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16822 return ret;
16825 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
16826 vld3_s32 (const int32_t * __a)
16828 int32x2x3_t ret;
16829 __builtin_aarch64_simd_ci __o;
16830 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16831 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16832 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16833 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16834 return ret;
16837 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
16838 vld3_u8 (const uint8_t * __a)
16840 uint8x8x3_t ret;
16841 __builtin_aarch64_simd_ci __o;
16842 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16843 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16844 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16845 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16846 return ret;
16849 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
16850 vld3_u16 (const uint16_t * __a)
16852 uint16x4x3_t ret;
16853 __builtin_aarch64_simd_ci __o;
16854 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16855 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16856 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16857 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16858 return ret;
16861 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
16862 vld3_u32 (const uint32_t * __a)
16864 uint32x2x3_t ret;
16865 __builtin_aarch64_simd_ci __o;
16866 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16867 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16868 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16869 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16870 return ret;
16873 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
16874 vld3_f32 (const float32_t * __a)
16876 float32x2x3_t ret;
16877 __builtin_aarch64_simd_ci __o;
16878 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
16879 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16880 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16881 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16882 return ret;
16885 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
16886 vld3q_s8 (const int8_t * __a)
16888 int8x16x3_t ret;
16889 __builtin_aarch64_simd_ci __o;
16890 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16891 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16892 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16893 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16894 return ret;
16897 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
16898 vld3q_p8 (const poly8_t * __a)
16900 poly8x16x3_t ret;
16901 __builtin_aarch64_simd_ci __o;
16902 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16903 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16904 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16905 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16906 return ret;
16909 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
16910 vld3q_s16 (const int16_t * __a)
16912 int16x8x3_t ret;
16913 __builtin_aarch64_simd_ci __o;
16914 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16915 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16916 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16917 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16918 return ret;
16921 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
16922 vld3q_p16 (const poly16_t * __a)
16924 poly16x8x3_t ret;
16925 __builtin_aarch64_simd_ci __o;
16926 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16927 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16928 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16929 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16930 return ret;
16933 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
16934 vld3q_s32 (const int32_t * __a)
16936 int32x4x3_t ret;
16937 __builtin_aarch64_simd_ci __o;
16938 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16939 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16940 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16941 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16942 return ret;
16945 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
16946 vld3q_s64 (const int64_t * __a)
16948 int64x2x3_t ret;
16949 __builtin_aarch64_simd_ci __o;
16950 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16951 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16952 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16953 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16954 return ret;
16957 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
16958 vld3q_u8 (const uint8_t * __a)
16960 uint8x16x3_t ret;
16961 __builtin_aarch64_simd_ci __o;
16962 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16963 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16964 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16965 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16966 return ret;
16969 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
16970 vld3q_u16 (const uint16_t * __a)
16972 uint16x8x3_t ret;
16973 __builtin_aarch64_simd_ci __o;
16974 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16975 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16976 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16977 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16978 return ret;
16981 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
16982 vld3q_u32 (const uint32_t * __a)
16984 uint32x4x3_t ret;
16985 __builtin_aarch64_simd_ci __o;
16986 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16987 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16988 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16989 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16990 return ret;
16993 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
16994 vld3q_u64 (const uint64_t * __a)
16996 uint64x2x3_t ret;
16997 __builtin_aarch64_simd_ci __o;
16998 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16999 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17000 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17001 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17002 return ret;
17005 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17006 vld3q_f32 (const float32_t * __a)
17008 float32x4x3_t ret;
17009 __builtin_aarch64_simd_ci __o;
17010 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
17011 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17012 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17013 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17014 return ret;
17017 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17018 vld3q_f64 (const float64_t * __a)
17020 float64x2x3_t ret;
17021 __builtin_aarch64_simd_ci __o;
17022 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
17023 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17024 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17025 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17026 return ret;
17029 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17030 vld4_s64 (const int64_t * __a)
17032 int64x1x4_t ret;
17033 __builtin_aarch64_simd_xi __o;
17034 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17035 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17036 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17037 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17038 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17039 return ret;
17042 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17043 vld4_u64 (const uint64_t * __a)
17045 uint64x1x4_t ret;
17046 __builtin_aarch64_simd_xi __o;
17047 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17048 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17049 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17050 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17051 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17052 return ret;
17055 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17056 vld4_f64 (const float64_t * __a)
17058 float64x1x4_t ret;
17059 __builtin_aarch64_simd_xi __o;
17060 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
17061 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17062 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17063 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17064 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17065 return ret;
17068 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17069 vld4_s8 (const int8_t * __a)
17071 int8x8x4_t ret;
17072 __builtin_aarch64_simd_xi __o;
17073 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17074 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17075 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17076 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17077 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17078 return ret;
17081 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17082 vld4_p8 (const poly8_t * __a)
17084 poly8x8x4_t ret;
17085 __builtin_aarch64_simd_xi __o;
17086 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17087 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17088 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17089 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17090 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17091 return ret;
17094 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17095 vld4_s16 (const int16_t * __a)
17097 int16x4x4_t ret;
17098 __builtin_aarch64_simd_xi __o;
17099 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17100 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17101 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17102 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17103 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17104 return ret;
17107 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17108 vld4_p16 (const poly16_t * __a)
17110 poly16x4x4_t ret;
17111 __builtin_aarch64_simd_xi __o;
17112 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17113 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17114 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17115 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17116 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17117 return ret;
17120 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17121 vld4_s32 (const int32_t * __a)
17123 int32x2x4_t ret;
17124 __builtin_aarch64_simd_xi __o;
17125 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17126 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17127 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17128 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17129 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17130 return ret;
17133 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17134 vld4_u8 (const uint8_t * __a)
17136 uint8x8x4_t ret;
17137 __builtin_aarch64_simd_xi __o;
17138 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17139 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17140 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17141 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17142 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17143 return ret;
17146 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17147 vld4_u16 (const uint16_t * __a)
17149 uint16x4x4_t ret;
17150 __builtin_aarch64_simd_xi __o;
17151 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17152 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17153 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17154 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17155 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17156 return ret;
17159 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17160 vld4_u32 (const uint32_t * __a)
17162 uint32x2x4_t ret;
17163 __builtin_aarch64_simd_xi __o;
17164 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17165 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17166 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17167 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17168 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17169 return ret;
17172 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17173 vld4_f32 (const float32_t * __a)
17175 float32x2x4_t ret;
17176 __builtin_aarch64_simd_xi __o;
17177 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
17178 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17179 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17180 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17181 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17182 return ret;
17185 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17186 vld4q_s8 (const int8_t * __a)
17188 int8x16x4_t ret;
17189 __builtin_aarch64_simd_xi __o;
17190 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17191 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17192 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17193 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17194 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17195 return ret;
17198 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17199 vld4q_p8 (const poly8_t * __a)
17201 poly8x16x4_t ret;
17202 __builtin_aarch64_simd_xi __o;
17203 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17204 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17205 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17206 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17207 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17208 return ret;
17211 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17212 vld4q_s16 (const int16_t * __a)
17214 int16x8x4_t ret;
17215 __builtin_aarch64_simd_xi __o;
17216 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17217 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17218 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17219 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17220 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17221 return ret;
17224 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17225 vld4q_p16 (const poly16_t * __a)
17227 poly16x8x4_t ret;
17228 __builtin_aarch64_simd_xi __o;
17229 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17230 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17231 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17232 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17233 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17234 return ret;
17237 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17238 vld4q_s32 (const int32_t * __a)
17240 int32x4x4_t ret;
17241 __builtin_aarch64_simd_xi __o;
17242 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17243 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17244 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17245 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17246 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17247 return ret;
17250 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17251 vld4q_s64 (const int64_t * __a)
17253 int64x2x4_t ret;
17254 __builtin_aarch64_simd_xi __o;
17255 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17256 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17257 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17258 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17259 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17260 return ret;
17263 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17264 vld4q_u8 (const uint8_t * __a)
17266 uint8x16x4_t ret;
17267 __builtin_aarch64_simd_xi __o;
17268 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17269 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17270 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17271 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17272 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17273 return ret;
17276 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17277 vld4q_u16 (const uint16_t * __a)
17279 uint16x8x4_t ret;
17280 __builtin_aarch64_simd_xi __o;
17281 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17282 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17283 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17284 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17285 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17286 return ret;
17289 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17290 vld4q_u32 (const uint32_t * __a)
17292 uint32x4x4_t ret;
17293 __builtin_aarch64_simd_xi __o;
17294 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17295 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17296 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17297 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17298 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17299 return ret;
17302 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17303 vld4q_u64 (const uint64_t * __a)
17305 uint64x2x4_t ret;
17306 __builtin_aarch64_simd_xi __o;
17307 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17308 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17309 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17310 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17311 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17312 return ret;
17315 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17316 vld4q_f32 (const float32_t * __a)
17318 float32x4x4_t ret;
17319 __builtin_aarch64_simd_xi __o;
17320 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
17321 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17322 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17323 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17324 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17325 return ret;
17328 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17329 vld4q_f64 (const float64_t * __a)
17331 float64x2x4_t ret;
17332 __builtin_aarch64_simd_xi __o;
17333 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
17334 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17335 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17336 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17337 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17338 return ret;
17341 /* vldn_dup */
17343 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
17344 vld2_dup_s8 (const int8_t * __a)
17346 int8x8x2_t ret;
17347 __builtin_aarch64_simd_oi __o;
17348 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17349 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17350 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17351 return ret;
17354 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
17355 vld2_dup_s16 (const int16_t * __a)
17357 int16x4x2_t ret;
17358 __builtin_aarch64_simd_oi __o;
17359 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17360 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17361 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17362 return ret;
17365 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
17366 vld2_dup_s32 (const int32_t * __a)
17368 int32x2x2_t ret;
17369 __builtin_aarch64_simd_oi __o;
17370 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
17371 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17372 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17373 return ret;
17376 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
17377 vld2_dup_f32 (const float32_t * __a)
17379 float32x2x2_t ret;
17380 __builtin_aarch64_simd_oi __o;
17381 __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17382 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
17383 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
17384 return ret;
17387 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
17388 vld2_dup_f64 (const float64_t * __a)
17390 float64x1x2_t ret;
17391 __builtin_aarch64_simd_oi __o;
17392 __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
17393 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
17394 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
17395 return ret;
17398 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
17399 vld2_dup_u8 (const uint8_t * __a)
17401 uint8x8x2_t ret;
17402 __builtin_aarch64_simd_oi __o;
17403 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17404 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17405 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17406 return ret;
17409 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
17410 vld2_dup_u16 (const uint16_t * __a)
17412 uint16x4x2_t ret;
17413 __builtin_aarch64_simd_oi __o;
17414 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17415 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17416 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17417 return ret;
17420 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
17421 vld2_dup_u32 (const uint32_t * __a)
17423 uint32x2x2_t ret;
17424 __builtin_aarch64_simd_oi __o;
17425 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
17426 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17427 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17428 return ret;
17431 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
17432 vld2_dup_p8 (const poly8_t * __a)
17434 poly8x8x2_t ret;
17435 __builtin_aarch64_simd_oi __o;
17436 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17437 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17438 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17439 return ret;
17442 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
17443 vld2_dup_p16 (const poly16_t * __a)
17445 poly16x4x2_t ret;
17446 __builtin_aarch64_simd_oi __o;
17447 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17448 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17449 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17450 return ret;
17453 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
17454 vld2_dup_s64 (const int64_t * __a)
17456 int64x1x2_t ret;
17457 __builtin_aarch64_simd_oi __o;
17458 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
17459 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17460 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17461 return ret;
17464 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
17465 vld2_dup_u64 (const uint64_t * __a)
17467 uint64x1x2_t ret;
17468 __builtin_aarch64_simd_oi __o;
17469 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
17470 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17471 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17472 return ret;
17475 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
17476 vld2q_dup_s8 (const int8_t * __a)
17478 int8x16x2_t ret;
17479 __builtin_aarch64_simd_oi __o;
17480 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17481 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17482 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17483 return ret;
17486 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
17487 vld2q_dup_p8 (const poly8_t * __a)
17489 poly8x16x2_t ret;
17490 __builtin_aarch64_simd_oi __o;
17491 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17492 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17493 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17494 return ret;
17497 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
17498 vld2q_dup_s16 (const int16_t * __a)
17500 int16x8x2_t ret;
17501 __builtin_aarch64_simd_oi __o;
17502 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17503 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17504 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17505 return ret;
17508 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
17509 vld2q_dup_p16 (const poly16_t * __a)
17511 poly16x8x2_t ret;
17512 __builtin_aarch64_simd_oi __o;
17513 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17514 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17515 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17516 return ret;
17519 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
17520 vld2q_dup_s32 (const int32_t * __a)
17522 int32x4x2_t ret;
17523 __builtin_aarch64_simd_oi __o;
17524 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
17525 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17526 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17527 return ret;
17530 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
17531 vld2q_dup_s64 (const int64_t * __a)
17533 int64x2x2_t ret;
17534 __builtin_aarch64_simd_oi __o;
17535 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
17536 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17537 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17538 return ret;
17541 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
17542 vld2q_dup_u8 (const uint8_t * __a)
17544 uint8x16x2_t ret;
17545 __builtin_aarch64_simd_oi __o;
17546 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17547 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17548 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17549 return ret;
17552 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
17553 vld2q_dup_u16 (const uint16_t * __a)
17555 uint16x8x2_t ret;
17556 __builtin_aarch64_simd_oi __o;
17557 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17558 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17559 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17560 return ret;
17563 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
17564 vld2q_dup_u32 (const uint32_t * __a)
17566 uint32x4x2_t ret;
17567 __builtin_aarch64_simd_oi __o;
17568 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
17569 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17570 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17571 return ret;
17574 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
17575 vld2q_dup_u64 (const uint64_t * __a)
17577 uint64x2x2_t ret;
17578 __builtin_aarch64_simd_oi __o;
17579 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
17580 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17581 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17582 return ret;
17585 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
17586 vld2q_dup_f32 (const float32_t * __a)
17588 float32x4x2_t ret;
17589 __builtin_aarch64_simd_oi __o;
17590 __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17591 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
17592 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
17593 return ret;
17596 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
17597 vld2q_dup_f64 (const float64_t * __a)
17599 float64x2x2_t ret;
17600 __builtin_aarch64_simd_oi __o;
17601 __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
17602 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
17603 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
17604 return ret;
17607 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
17608 vld3_dup_s64 (const int64_t * __a)
17610 int64x1x3_t ret;
17611 __builtin_aarch64_simd_ci __o;
17612 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
17613 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17614 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17615 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17616 return ret;
17619 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
17620 vld3_dup_u64 (const uint64_t * __a)
17622 uint64x1x3_t ret;
17623 __builtin_aarch64_simd_ci __o;
17624 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
17625 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17626 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17627 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17628 return ret;
17631 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
17632 vld3_dup_f64 (const float64_t * __a)
17634 float64x1x3_t ret;
17635 __builtin_aarch64_simd_ci __o;
17636 __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
17637 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
17638 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
17639 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
17640 return ret;
17643 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
17644 vld3_dup_s8 (const int8_t * __a)
17646 int8x8x3_t ret;
17647 __builtin_aarch64_simd_ci __o;
17648 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17649 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17650 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17651 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17652 return ret;
17655 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
17656 vld3_dup_p8 (const poly8_t * __a)
17658 poly8x8x3_t ret;
17659 __builtin_aarch64_simd_ci __o;
17660 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17661 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17662 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17663 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17664 return ret;
17667 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17668 vld3_dup_s16 (const int16_t * __a)
17670 int16x4x3_t ret;
17671 __builtin_aarch64_simd_ci __o;
17672 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17673 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17674 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17675 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17676 return ret;
17679 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17680 vld3_dup_p16 (const poly16_t * __a)
17682 poly16x4x3_t ret;
17683 __builtin_aarch64_simd_ci __o;
17684 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17685 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17686 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17687 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17688 return ret;
17691 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17692 vld3_dup_s32 (const int32_t * __a)
17694 int32x2x3_t ret;
17695 __builtin_aarch64_simd_ci __o;
17696 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17697 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17698 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17699 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17700 return ret;
17703 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17704 vld3_dup_u8 (const uint8_t * __a)
17706 uint8x8x3_t ret;
17707 __builtin_aarch64_simd_ci __o;
17708 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17709 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17710 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17711 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17712 return ret;
17715 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17716 vld3_dup_u16 (const uint16_t * __a)
17718 uint16x4x3_t ret;
17719 __builtin_aarch64_simd_ci __o;
17720 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17721 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17722 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17723 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17724 return ret;
17727 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17728 vld3_dup_u32 (const uint32_t * __a)
17730 uint32x2x3_t ret;
17731 __builtin_aarch64_simd_ci __o;
17732 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17733 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17734 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17735 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17736 return ret;
17739 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17740 vld3_dup_f32 (const float32_t * __a)
17742 float32x2x3_t ret;
17743 __builtin_aarch64_simd_ci __o;
17744 __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17745 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17746 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17747 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17748 return ret;
17751 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17752 vld3q_dup_s8 (const int8_t * __a)
17754 int8x16x3_t ret;
17755 __builtin_aarch64_simd_ci __o;
17756 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17757 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17758 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17759 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17760 return ret;
17763 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17764 vld3q_dup_p8 (const poly8_t * __a)
17766 poly8x16x3_t ret;
17767 __builtin_aarch64_simd_ci __o;
17768 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17769 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17770 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17771 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17772 return ret;
17775 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17776 vld3q_dup_s16 (const int16_t * __a)
17778 int16x8x3_t ret;
17779 __builtin_aarch64_simd_ci __o;
17780 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17781 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17782 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17783 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17784 return ret;
17787 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17788 vld3q_dup_p16 (const poly16_t * __a)
17790 poly16x8x3_t ret;
17791 __builtin_aarch64_simd_ci __o;
17792 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17793 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17794 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17795 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17796 return ret;
17799 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17800 vld3q_dup_s32 (const int32_t * __a)
17802 int32x4x3_t ret;
17803 __builtin_aarch64_simd_ci __o;
17804 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17805 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17806 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17807 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17808 return ret;
17811 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17812 vld3q_dup_s64 (const int64_t * __a)
17814 int64x2x3_t ret;
17815 __builtin_aarch64_simd_ci __o;
17816 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17817 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17818 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17819 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17820 return ret;
17823 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17824 vld3q_dup_u8 (const uint8_t * __a)
17826 uint8x16x3_t ret;
17827 __builtin_aarch64_simd_ci __o;
17828 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17829 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17830 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17831 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17832 return ret;
17835 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17836 vld3q_dup_u16 (const uint16_t * __a)
17838 uint16x8x3_t ret;
17839 __builtin_aarch64_simd_ci __o;
17840 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17841 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17842 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17843 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17844 return ret;
17847 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17848 vld3q_dup_u32 (const uint32_t * __a)
17850 uint32x4x3_t ret;
17851 __builtin_aarch64_simd_ci __o;
17852 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17853 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17854 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17855 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17856 return ret;
17859 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17860 vld3q_dup_u64 (const uint64_t * __a)
17862 uint64x2x3_t ret;
17863 __builtin_aarch64_simd_ci __o;
17864 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17865 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17866 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17867 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17868 return ret;
17871 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17872 vld3q_dup_f32 (const float32_t * __a)
17874 float32x4x3_t ret;
17875 __builtin_aarch64_simd_ci __o;
17876 __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17877 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17878 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17879 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17880 return ret;
17883 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17884 vld3q_dup_f64 (const float64_t * __a)
17886 float64x2x3_t ret;
17887 __builtin_aarch64_simd_ci __o;
17888 __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
17889 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17890 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17891 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17892 return ret;
17895 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17896 vld4_dup_s64 (const int64_t * __a)
17898 int64x1x4_t ret;
17899 __builtin_aarch64_simd_xi __o;
17900 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17901 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17902 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17903 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17904 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17905 return ret;
17908 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17909 vld4_dup_u64 (const uint64_t * __a)
17911 uint64x1x4_t ret;
17912 __builtin_aarch64_simd_xi __o;
17913 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17914 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17915 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17916 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17917 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17918 return ret;
17921 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17922 vld4_dup_f64 (const float64_t * __a)
17924 float64x1x4_t ret;
17925 __builtin_aarch64_simd_xi __o;
17926 __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
17927 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17928 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17929 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17930 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17931 return ret;
17934 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17935 vld4_dup_s8 (const int8_t * __a)
17937 int8x8x4_t ret;
17938 __builtin_aarch64_simd_xi __o;
17939 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17940 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17941 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17942 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17943 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17944 return ret;
17947 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17948 vld4_dup_p8 (const poly8_t * __a)
17950 poly8x8x4_t ret;
17951 __builtin_aarch64_simd_xi __o;
17952 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17953 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17954 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17955 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17956 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17957 return ret;
17960 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17961 vld4_dup_s16 (const int16_t * __a)
17963 int16x4x4_t ret;
17964 __builtin_aarch64_simd_xi __o;
17965 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17966 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17967 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17968 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17969 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17970 return ret;
17973 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17974 vld4_dup_p16 (const poly16_t * __a)
17976 poly16x4x4_t ret;
17977 __builtin_aarch64_simd_xi __o;
17978 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17979 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17980 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17981 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17982 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17983 return ret;
17986 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17987 vld4_dup_s32 (const int32_t * __a)
17989 int32x2x4_t ret;
17990 __builtin_aarch64_simd_xi __o;
17991 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
17992 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17993 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17994 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17995 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17996 return ret;
17999 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
18000 vld4_dup_u8 (const uint8_t * __a)
18002 uint8x8x4_t ret;
18003 __builtin_aarch64_simd_xi __o;
18004 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
18005 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
18006 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
18007 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
18008 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
18009 return ret;
18012 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
18013 vld4_dup_u16 (const uint16_t * __a)
18015 uint16x4x4_t ret;
18016 __builtin_aarch64_simd_xi __o;
18017 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
18018 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
18019 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
18020 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
18021 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
18022 return ret;
18025 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
18026 vld4_dup_u32 (const uint32_t * __a)
18028 uint32x2x4_t ret;
18029 __builtin_aarch64_simd_xi __o;
18030 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
18031 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
18032 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
18033 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
18034 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
18035 return ret;
18038 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
18039 vld4_dup_f32 (const float32_t * __a)
18041 float32x2x4_t ret;
18042 __builtin_aarch64_simd_xi __o;
18043 __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
18044 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
18045 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
18046 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
18047 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
18048 return ret;
18051 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
18052 vld4q_dup_s8 (const int8_t * __a)
18054 int8x16x4_t ret;
18055 __builtin_aarch64_simd_xi __o;
18056 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
18057 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
18058 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
18059 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
18060 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
18061 return ret;
18064 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
18065 vld4q_dup_p8 (const poly8_t * __a)
18067 poly8x16x4_t ret;
18068 __builtin_aarch64_simd_xi __o;
18069 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
18070 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
18071 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
18072 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
18073 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
18074 return ret;
18077 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
18078 vld4q_dup_s16 (const int16_t * __a)
18080 int16x8x4_t ret;
18081 __builtin_aarch64_simd_xi __o;
18082 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
18083 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
18084 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
18085 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
18086 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
18087 return ret;
18090 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
18091 vld4q_dup_p16 (const poly16_t * __a)
18093 poly16x8x4_t ret;
18094 __builtin_aarch64_simd_xi __o;
18095 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
18096 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
18097 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
18098 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
18099 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
18100 return ret;
18103 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
18104 vld4q_dup_s32 (const int32_t * __a)
18106 int32x4x4_t ret;
18107 __builtin_aarch64_simd_xi __o;
18108 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
18109 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
18110 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
18111 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
18112 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
18113 return ret;
18116 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
18117 vld4q_dup_s64 (const int64_t * __a)
18119 int64x2x4_t ret;
18120 __builtin_aarch64_simd_xi __o;
18121 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
18122 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
18123 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
18124 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
18125 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
18126 return ret;
18129 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
18130 vld4q_dup_u8 (const uint8_t * __a)
18132 uint8x16x4_t ret;
18133 __builtin_aarch64_simd_xi __o;
18134 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
18135 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
18136 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
18137 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
18138 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
18139 return ret;
18142 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
18143 vld4q_dup_u16 (const uint16_t * __a)
18145 uint16x8x4_t ret;
18146 __builtin_aarch64_simd_xi __o;
18147 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
18148 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
18149 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
18150 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
18151 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
18152 return ret;
18155 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
18156 vld4q_dup_u32 (const uint32_t * __a)
18158 uint32x4x4_t ret;
18159 __builtin_aarch64_simd_xi __o;
18160 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
18161 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
18162 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
18163 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
18164 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
18165 return ret;
18168 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
18169 vld4q_dup_u64 (const uint64_t * __a)
18171 uint64x2x4_t ret;
18172 __builtin_aarch64_simd_xi __o;
18173 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
18174 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
18175 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
18176 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
18177 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
18178 return ret;
18181 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
18182 vld4q_dup_f32 (const float32_t * __a)
18184 float32x4x4_t ret;
18185 __builtin_aarch64_simd_xi __o;
18186 __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
18187 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
18188 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
18189 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
18190 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
18191 return ret;
18194 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
18195 vld4q_dup_f64 (const float64_t * __a)
18197 float64x2x4_t ret;
18198 __builtin_aarch64_simd_xi __o;
18199 __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
18200 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
18201 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
18202 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
18203 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
18204 return ret;
18207 /* vld2_lane */
18209 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, \
18210 mode, ptrmode, funcsuffix, signedtype) \
18211 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18212 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18214 __builtin_aarch64_simd_oi __o; \
18215 largetype __temp; \
18216 __temp.val[0] = \
18217 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
18218 __temp.val[1] = \
18219 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
18220 __o = __builtin_aarch64_set_qregoi##mode (__o, \
18221 (signedtype) __temp.val[0], \
18222 0); \
18223 __o = __builtin_aarch64_set_qregoi##mode (__o, \
18224 (signedtype) __temp.val[1], \
18225 1); \
18226 __o = __builtin_aarch64_ld2_lane##mode ( \
18227 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18228 __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \
18229 __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \
18230 return __b; \
18233 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
18234 sf, f32, float32x4_t)
18235 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df,
18236 df, f64, float64x2_t)
18237 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8,
18238 int8x16_t)
18239 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi,
18240 p16, int16x8_t)
18241 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8,
18242 int8x16_t)
18243 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16,
18244 int16x8_t)
18245 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32,
18246 int32x4_t)
18247 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64,
18248 int64x2_t)
18249 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8,
18250 int8x16_t)
18251 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi,
18252 u16, int16x8_t)
18253 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si,
18254 u32, int32x4_t)
18255 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di,
18256 u64, int64x2_t)
18258 #undef __LD2_LANE_FUNC
18260 /* vld2q_lane */
18262 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
18263 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18264 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18266 __builtin_aarch64_simd_oi __o; \
18267 intype ret; \
18268 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
18269 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
18270 __o = __builtin_aarch64_ld2_lane##mode ( \
18271 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18272 ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \
18273 ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \
18274 return ret; \
18277 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
18278 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
18279 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
18280 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
18281 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
18282 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
18283 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
18284 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
18285 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
18286 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
18287 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
18288 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
18290 #undef __LD2_LANE_FUNC
18292 /* vld3_lane */
18294 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, \
18295 mode, ptrmode, funcsuffix, signedtype) \
18296 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18297 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18299 __builtin_aarch64_simd_ci __o; \
18300 largetype __temp; \
18301 __temp.val[0] = \
18302 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
18303 __temp.val[1] = \
18304 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
18305 __temp.val[2] = \
18306 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
18307 __o = __builtin_aarch64_set_qregci##mode (__o, \
18308 (signedtype) __temp.val[0], \
18309 0); \
18310 __o = __builtin_aarch64_set_qregci##mode (__o, \
18311 (signedtype) __temp.val[1], \
18312 1); \
18313 __o = __builtin_aarch64_set_qregci##mode (__o, \
18314 (signedtype) __temp.val[2], \
18315 2); \
18316 __o = __builtin_aarch64_ld3_lane##mode ( \
18317 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18318 __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
18319 __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
18320 __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
18321 return __b; \
18324 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
18325 sf, f32, float32x4_t)
18326 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df,
18327 df, f64, float64x2_t)
18328 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8,
18329 int8x16_t)
18330 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi,
18331 p16, int16x8_t)
18332 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8,
18333 int8x16_t)
18334 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16,
18335 int16x8_t)
18336 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32,
18337 int32x4_t)
18338 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64,
18339 int64x2_t)
18340 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8,
18341 int8x16_t)
18342 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi,
18343 u16, int16x8_t)
18344 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si,
18345 u32, int32x4_t)
18346 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di,
18347 u64, int64x2_t)
18349 #undef __LD3_LANE_FUNC
18351 /* vld3q_lane */
18353 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
18354 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18355 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18357 __builtin_aarch64_simd_ci __o; \
18358 intype ret; \
18359 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
18360 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
18361 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
18362 __o = __builtin_aarch64_ld3_lane##mode ( \
18363 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18364 ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
18365 ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
18366 ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
18367 return ret; \
18370 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
18371 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
18372 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
18373 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
18374 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
18375 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
18376 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
18377 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
18378 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
18379 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
18380 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
18381 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
18383 #undef __LD3_LANE_FUNC
18385 /* vld4_lane */
18387 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, \
18388 mode, ptrmode, funcsuffix, signedtype) \
18389 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18390 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18392 __builtin_aarch64_simd_xi __o; \
18393 largetype __temp; \
18394 __temp.val[0] = \
18395 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
18396 __temp.val[1] = \
18397 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
18398 __temp.val[2] = \
18399 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
18400 __temp.val[3] = \
18401 vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \
18402 __o = __builtin_aarch64_set_qregxi##mode (__o, \
18403 (signedtype) __temp.val[0], \
18404 0); \
18405 __o = __builtin_aarch64_set_qregxi##mode (__o, \
18406 (signedtype) __temp.val[1], \
18407 1); \
18408 __o = __builtin_aarch64_set_qregxi##mode (__o, \
18409 (signedtype) __temp.val[2], \
18410 2); \
18411 __o = __builtin_aarch64_set_qregxi##mode (__o, \
18412 (signedtype) __temp.val[3], \
18413 3); \
18414 __o = __builtin_aarch64_ld4_lane##mode ( \
18415 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18416 __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \
18417 __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \
18418 __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \
18419 __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \
18420 return __b; \
18423 /* vld4q_lane */
18425 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
18426 sf, f32, float32x4_t)
18427 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df,
18428 df, f64, float64x2_t)
18429 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8,
18430 int8x16_t)
18431 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi,
18432 p16, int16x8_t)
18433 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8,
18434 int8x16_t)
18435 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16,
18436 int16x8_t)
18437 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32,
18438 int32x4_t)
18439 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64,
18440 int64x2_t)
18441 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8,
18442 int8x16_t)
18443 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi,
18444 u16, int16x8_t)
18445 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si,
18446 u32, int32x4_t)
18447 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di,
18448 u64, int64x2_t)
18450 #undef __LD4_LANE_FUNC
18452 /* vld4q_lane */
18454 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
18455 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
18456 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
18458 __builtin_aarch64_simd_xi __o; \
18459 intype ret; \
18460 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
18461 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
18462 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
18463 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
18464 __o = __builtin_aarch64_ld4_lane##mode ( \
18465 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
18466 ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \
18467 ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \
18468 ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \
18469 ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \
18470 return ret; \
18473 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
18474 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
18475 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
18476 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
18477 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
18478 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
18479 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
18480 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
18481 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
18482 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
18483 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
18484 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
18486 #undef __LD4_LANE_FUNC
18488 /* vmax */
18490 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18491 vmax_f32 (float32x2_t __a, float32x2_t __b)
18493 return __builtin_aarch64_smax_nanv2sf (__a, __b);
18496 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18497 vmax_s8 (int8x8_t __a, int8x8_t __b)
18499 return __builtin_aarch64_smaxv8qi (__a, __b);
18502 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18503 vmax_s16 (int16x4_t __a, int16x4_t __b)
18505 return __builtin_aarch64_smaxv4hi (__a, __b);
18508 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18509 vmax_s32 (int32x2_t __a, int32x2_t __b)
18511 return __builtin_aarch64_smaxv2si (__a, __b);
18514 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18515 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
18517 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
18518 (int8x8_t) __b);
18521 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18522 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
18524 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
18525 (int16x4_t) __b);
18528 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18529 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
18531 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
18532 (int32x2_t) __b);
18535 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18536 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
18538 return __builtin_aarch64_smax_nanv4sf (__a, __b);
18541 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18542 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
18544 return __builtin_aarch64_smax_nanv2df (__a, __b);
18547 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18548 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
18550 return __builtin_aarch64_smaxv16qi (__a, __b);
18553 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18554 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
18556 return __builtin_aarch64_smaxv8hi (__a, __b);
18559 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18560 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
18562 return __builtin_aarch64_smaxv4si (__a, __b);
18565 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18566 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
18568 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
18569 (int8x16_t) __b);
18572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18573 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
18575 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
18576 (int16x8_t) __b);
18579 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18580 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
18582 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
18583 (int32x4_t) __b);
18586 /* vmaxnm */
18588 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18589 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
18591 return __builtin_aarch64_smaxv2sf (__a, __b);
18594 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18595 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
18597 return __builtin_aarch64_smaxv4sf (__a, __b);
18600 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18601 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
18603 return __builtin_aarch64_smaxv2df (__a, __b);
18606 /* vmaxv */
18608 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18609 vmaxv_f32 (float32x2_t __a)
18611 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
18615 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18616 vmaxv_s8 (int8x8_t __a)
18618 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
18621 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18622 vmaxv_s16 (int16x4_t __a)
18624 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
18627 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18628 vmaxv_s32 (int32x2_t __a)
18630 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
18633 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18634 vmaxv_u8 (uint8x8_t __a)
18636 return vget_lane_u8 ((uint8x8_t)
18637 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
18641 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18642 vmaxv_u16 (uint16x4_t __a)
18644 return vget_lane_u16 ((uint16x4_t)
18645 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
18649 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18650 vmaxv_u32 (uint32x2_t __a)
18652 return vget_lane_u32 ((uint32x2_t)
18653 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
18657 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18658 vmaxvq_f32 (float32x4_t __a)
18660 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
18664 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18665 vmaxvq_f64 (float64x2_t __a)
18667 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
18671 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18672 vmaxvq_s8 (int8x16_t __a)
18674 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
18677 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18678 vmaxvq_s16 (int16x8_t __a)
18680 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
18683 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18684 vmaxvq_s32 (int32x4_t __a)
18686 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
18689 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18690 vmaxvq_u8 (uint8x16_t __a)
18692 return vgetq_lane_u8 ((uint8x16_t)
18693 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
18697 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18698 vmaxvq_u16 (uint16x8_t __a)
18700 return vgetq_lane_u16 ((uint16x8_t)
18701 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
18705 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18706 vmaxvq_u32 (uint32x4_t __a)
18708 return vgetq_lane_u32 ((uint32x4_t)
18709 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
18713 /* vmaxnmv */
18715 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18716 vmaxnmv_f32 (float32x2_t __a)
18718 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
18722 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18723 vmaxnmvq_f32 (float32x4_t __a)
18725 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
18728 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18729 vmaxnmvq_f64 (float64x2_t __a)
18731 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
18734 /* vmin */
18736 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18737 vmin_f32 (float32x2_t __a, float32x2_t __b)
18739 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18742 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18743 vmin_s8 (int8x8_t __a, int8x8_t __b)
18745 return __builtin_aarch64_sminv8qi (__a, __b);
18748 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18749 vmin_s16 (int16x4_t __a, int16x4_t __b)
18751 return __builtin_aarch64_sminv4hi (__a, __b);
18754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18755 vmin_s32 (int32x2_t __a, int32x2_t __b)
18757 return __builtin_aarch64_sminv2si (__a, __b);
18760 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18761 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18763 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18764 (int8x8_t) __b);
18767 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18768 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18770 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18771 (int16x4_t) __b);
18774 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18775 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18777 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18778 (int32x2_t) __b);
18781 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18782 vminq_f32 (float32x4_t __a, float32x4_t __b)
18784 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18787 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18788 vminq_f64 (float64x2_t __a, float64x2_t __b)
18790 return __builtin_aarch64_smin_nanv2df (__a, __b);
18793 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18794 vminq_s8 (int8x16_t __a, int8x16_t __b)
18796 return __builtin_aarch64_sminv16qi (__a, __b);
18799 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18800 vminq_s16 (int16x8_t __a, int16x8_t __b)
18802 return __builtin_aarch64_sminv8hi (__a, __b);
18805 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18806 vminq_s32 (int32x4_t __a, int32x4_t __b)
18808 return __builtin_aarch64_sminv4si (__a, __b);
18811 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18812 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18814 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18815 (int8x16_t) __b);
18818 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18819 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18821 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18822 (int16x8_t) __b);
18825 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18826 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18828 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18829 (int32x4_t) __b);
18832 /* vminnm */
18834 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18835 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18837 return __builtin_aarch64_sminv2sf (__a, __b);
18840 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18841 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18843 return __builtin_aarch64_sminv4sf (__a, __b);
18846 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18847 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18849 return __builtin_aarch64_sminv2df (__a, __b);
18852 /* vminv */
18854 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18855 vminv_f32 (float32x2_t __a)
18857 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
18861 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18862 vminv_s8 (int8x8_t __a)
18864 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
18868 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18869 vminv_s16 (int16x4_t __a)
18871 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
18874 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18875 vminv_s32 (int32x2_t __a)
18877 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
18880 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18881 vminv_u8 (uint8x8_t __a)
18883 return vget_lane_u8 ((uint8x8_t)
18884 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
18888 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18889 vminv_u16 (uint16x4_t __a)
18891 return vget_lane_u16 ((uint16x4_t)
18892 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
18896 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18897 vminv_u32 (uint32x2_t __a)
18899 return vget_lane_u32 ((uint32x2_t)
18900 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
18904 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18905 vminvq_f32 (float32x4_t __a)
18907 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
18911 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18912 vminvq_f64 (float64x2_t __a)
18914 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
18918 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18919 vminvq_s8 (int8x16_t __a)
18921 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
18924 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18925 vminvq_s16 (int16x8_t __a)
18927 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
18930 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18931 vminvq_s32 (int32x4_t __a)
18933 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
18936 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18937 vminvq_u8 (uint8x16_t __a)
18939 return vgetq_lane_u8 ((uint8x16_t)
18940 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
18944 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18945 vminvq_u16 (uint16x8_t __a)
18947 return vgetq_lane_u16 ((uint16x8_t)
18948 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
18952 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18953 vminvq_u32 (uint32x4_t __a)
18955 return vgetq_lane_u32 ((uint32x4_t)
18956 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
18960 /* vminnmv */
18962 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18963 vminnmv_f32 (float32x2_t __a)
18965 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
18968 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18969 vminnmvq_f32 (float32x4_t __a)
18971 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
18974 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18975 vminnmvq_f64 (float64x2_t __a)
18977 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
18980 /* vmla */
18982 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18983 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18985 return a + b * c;
18988 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18989 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18991 return __a + __b * __c;
18994 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18995 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18997 return a + b * c;
19000 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19001 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
19003 return a + b * c;
19006 /* vmla_lane */
19008 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19009 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
19010 float32x2_t __c, const int __lane)
19012 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
19015 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19016 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
19017 int16x4_t __c, const int __lane)
19019 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
19022 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19023 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
19024 int32x2_t __c, const int __lane)
19026 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
19029 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19030 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
19031 uint16x4_t __c, const int __lane)
19033 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
19036 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19037 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
19038 uint32x2_t __c, const int __lane)
19040 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
19043 /* vmla_laneq */
19045 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19046 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
19047 float32x4_t __c, const int __lane)
19049 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
19052 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19053 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
19054 int16x8_t __c, const int __lane)
19056 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
19059 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19060 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
19061 int32x4_t __c, const int __lane)
19063 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
19066 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19067 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
19068 uint16x8_t __c, const int __lane)
19070 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
19073 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19074 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
19075 uint32x4_t __c, const int __lane)
19077 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
19080 /* vmlaq_lane */
19082 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19083 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
19084 float32x2_t __c, const int __lane)
19086 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
19089 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19090 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
19091 int16x4_t __c, const int __lane)
19093 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
19096 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19097 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
19098 int32x2_t __c, const int __lane)
19100 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
19103 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19104 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
19105 uint16x4_t __c, const int __lane)
19107 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
19110 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19111 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
19112 uint32x2_t __c, const int __lane)
19114 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
19117 /* vmlaq_laneq */
19119 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19120 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
19121 float32x4_t __c, const int __lane)
19123 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
19126 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19127 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
19128 int16x8_t __c, const int __lane)
19130 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
19133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19134 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
19135 int32x4_t __c, const int __lane)
19137 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
19140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19141 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
19142 uint16x8_t __c, const int __lane)
19144 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
19147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19148 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
19149 uint32x4_t __c, const int __lane)
19151 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
19154 /* vmls */
19156 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19157 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
19159 return a - b * c;
19162 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19163 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
19165 return __a - __b * __c;
19168 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19169 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
19171 return a - b * c;
19174 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19175 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
19177 return a - b * c;
19180 /* vmls_lane */
19182 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19183 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
19184 float32x2_t __c, const int __lane)
19186 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
19189 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19190 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
19191 int16x4_t __c, const int __lane)
19193 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
19196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19197 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
19198 int32x2_t __c, const int __lane)
19200 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
19203 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19204 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
19205 uint16x4_t __c, const int __lane)
19207 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
19210 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19211 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
19212 uint32x2_t __c, const int __lane)
19214 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
19217 /* vmls_laneq */
19219 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19220 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
19221 float32x4_t __c, const int __lane)
19223 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
19226 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19227 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
19228 int16x8_t __c, const int __lane)
19230 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
19233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19234 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
19235 int32x4_t __c, const int __lane)
19237 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
19240 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19241 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
19242 uint16x8_t __c, const int __lane)
19244 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
19247 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19248 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
19249 uint32x4_t __c, const int __lane)
19251 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
19254 /* vmlsq_lane */
19256 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19257 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
19258 float32x2_t __c, const int __lane)
19260 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
19263 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19264 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
19265 int16x4_t __c, const int __lane)
19267 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
19270 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19271 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
19272 int32x2_t __c, const int __lane)
19274 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
19277 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19278 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
19279 uint16x4_t __c, const int __lane)
19281 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
19284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19285 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
19286 uint32x2_t __c, const int __lane)
19288 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
19291 /* vmlsq_laneq */
19293 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19294 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
19295 float32x4_t __c, const int __lane)
19297 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
19300 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19301 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
19302 int16x8_t __c, const int __lane)
19304 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
19307 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19308 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
19309 int32x4_t __c, const int __lane)
19311 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
19313 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19314 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
19315 uint16x8_t __c, const int __lane)
19317 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
19320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19321 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
19322 uint32x4_t __c, const int __lane)
19324 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
19327 /* vmov_n_ */
19329 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19330 vmov_n_f32 (float32_t __a)
19332 return vdup_n_f32 (__a);
19335 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19336 vmov_n_f64 (float64_t __a)
19338 return (float64x1_t) {__a};
19341 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19342 vmov_n_p8 (poly8_t __a)
19344 return vdup_n_p8 (__a);
19347 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19348 vmov_n_p16 (poly16_t __a)
19350 return vdup_n_p16 (__a);
19353 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19354 vmov_n_s8 (int8_t __a)
19356 return vdup_n_s8 (__a);
19359 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19360 vmov_n_s16 (int16_t __a)
19362 return vdup_n_s16 (__a);
19365 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19366 vmov_n_s32 (int32_t __a)
19368 return vdup_n_s32 (__a);
19371 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19372 vmov_n_s64 (int64_t __a)
19374 return (int64x1_t) {__a};
19377 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19378 vmov_n_u8 (uint8_t __a)
19380 return vdup_n_u8 (__a);
19383 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19384 vmov_n_u16 (uint16_t __a)
19386 return vdup_n_u16 (__a);
19389 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19390 vmov_n_u32 (uint32_t __a)
19392 return vdup_n_u32 (__a);
19395 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19396 vmov_n_u64 (uint64_t __a)
19398 return (uint64x1_t) {__a};
19401 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19402 vmovq_n_f32 (float32_t __a)
19404 return vdupq_n_f32 (__a);
19407 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19408 vmovq_n_f64 (float64_t __a)
19410 return vdupq_n_f64 (__a);
19413 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19414 vmovq_n_p8 (poly8_t __a)
19416 return vdupq_n_p8 (__a);
19419 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19420 vmovq_n_p16 (poly16_t __a)
19422 return vdupq_n_p16 (__a);
19425 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19426 vmovq_n_s8 (int8_t __a)
19428 return vdupq_n_s8 (__a);
19431 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19432 vmovq_n_s16 (int16_t __a)
19434 return vdupq_n_s16 (__a);
19437 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19438 vmovq_n_s32 (int32_t __a)
19440 return vdupq_n_s32 (__a);
19443 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19444 vmovq_n_s64 (int64_t __a)
19446 return vdupq_n_s64 (__a);
19449 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19450 vmovq_n_u8 (uint8_t __a)
19452 return vdupq_n_u8 (__a);
19455 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19456 vmovq_n_u16 (uint16_t __a)
19458 return vdupq_n_u16 (__a);
19461 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19462 vmovq_n_u32 (uint32_t __a)
19464 return vdupq_n_u32 (__a);
19467 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19468 vmovq_n_u64 (uint64_t __a)
19470 return vdupq_n_u64 (__a);
19473 /* vmul_lane */
19475 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19476 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
19478 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19481 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19482 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
19484 return __a * __b;
19487 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19488 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
19490 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19493 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19494 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
19496 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19499 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19500 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
19502 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19505 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19506 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
19508 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19511 /* vmuld_lane */
19513 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19514 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
19516 return __a * __aarch64_vget_lane_f64 (__b, __lane);
19519 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19520 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
19522 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19525 /* vmuls_lane */
19527 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19528 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
19530 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19533 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19534 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
19536 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19539 /* vmul_laneq */
19541 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19542 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
19544 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19547 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19548 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
19550 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19553 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19554 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
19556 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19559 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19560 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
19562 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19565 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19566 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
19568 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19571 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19572 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
19574 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19577 /* vmul_n */
19579 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19580 vmul_n_f64 (float64x1_t __a, float64_t __b)
19582 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
19585 /* vmulq_lane */
19587 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19588 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
19590 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19593 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19594 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
19596 __builtin_aarch64_im_lane_boundsi (__lane, 1);
19597 return __a * __b[0];
19600 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19601 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
19603 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19607 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
19609 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19612 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19613 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
19615 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19618 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19619 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
19621 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19624 /* vmulq_laneq */
19626 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19627 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
19629 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19632 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19633 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
19635 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19638 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19639 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
19641 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19645 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
19647 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19650 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19651 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
19653 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19656 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19657 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
19659 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19662 /* vneg */
19664 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19665 vneg_f32 (float32x2_t __a)
19667 return -__a;
19670 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19671 vneg_f64 (float64x1_t __a)
19673 return -__a;
19676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19677 vneg_s8 (int8x8_t __a)
19679 return -__a;
19682 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19683 vneg_s16 (int16x4_t __a)
19685 return -__a;
19688 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19689 vneg_s32 (int32x2_t __a)
19691 return -__a;
19694 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19695 vneg_s64 (int64x1_t __a)
19697 return -__a;
19700 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19701 vnegq_f32 (float32x4_t __a)
19703 return -__a;
19706 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19707 vnegq_f64 (float64x2_t __a)
19709 return -__a;
19712 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19713 vnegq_s8 (int8x16_t __a)
19715 return -__a;
19718 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19719 vnegq_s16 (int16x8_t __a)
19721 return -__a;
19724 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19725 vnegq_s32 (int32x4_t __a)
19727 return -__a;
19730 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19731 vnegq_s64 (int64x2_t __a)
19733 return -__a;
19736 /* vpadd */
19738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19739 vpadd_s8 (int8x8_t __a, int8x8_t __b)
19741 return __builtin_aarch64_addpv8qi (__a, __b);
19744 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19745 vpadd_s16 (int16x4_t __a, int16x4_t __b)
19747 return __builtin_aarch64_addpv4hi (__a, __b);
19750 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19751 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19753 return __builtin_aarch64_addpv2si (__a, __b);
19756 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19757 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19759 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19760 (int8x8_t) __b);
19763 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19764 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19766 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19767 (int16x4_t) __b);
19770 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19771 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19773 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19774 (int32x2_t) __b);
19777 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19778 vpaddd_f64 (float64x2_t __a)
19780 return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
19783 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19784 vpaddd_s64 (int64x2_t __a)
19786 return __builtin_aarch64_addpdi (__a);
19789 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19790 vpaddd_u64 (uint64x2_t __a)
19792 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19795 /* vqabs */
19797 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19798 vqabsq_s64 (int64x2_t __a)
19800 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19803 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19804 vqabsb_s8 (int8_t __a)
19806 return (int8_t) __builtin_aarch64_sqabsqi (__a);
19809 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19810 vqabsh_s16 (int16_t __a)
19812 return (int16_t) __builtin_aarch64_sqabshi (__a);
19815 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19816 vqabss_s32 (int32_t __a)
19818 return (int32_t) __builtin_aarch64_sqabssi (__a);
19821 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19822 vqabsd_s64 (int64_t __a)
19824 return __builtin_aarch64_sqabsdi (__a);
19827 /* vqadd */
19829 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19830 vqaddb_s8 (int8_t __a, int8_t __b)
19832 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19835 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19836 vqaddh_s16 (int16_t __a, int16_t __b)
19838 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19841 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19842 vqadds_s32 (int32_t __a, int32_t __b)
19844 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19847 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19848 vqaddd_s64 (int64_t __a, int64_t __b)
19850 return __builtin_aarch64_sqadddi (__a, __b);
19853 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19854 vqaddb_u8 (uint8_t __a, uint8_t __b)
19856 return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19859 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19860 vqaddh_u16 (uint16_t __a, uint16_t __b)
19862 return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19865 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19866 vqadds_u32 (uint32_t __a, uint32_t __b)
19868 return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19871 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19872 vqaddd_u64 (uint64_t __a, uint64_t __b)
19874 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19877 /* vqdmlal */
19879 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19880 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19882 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19885 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19886 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19888 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19892 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19893 int const __d)
19895 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19898 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19899 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19900 int const __d)
19902 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19905 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19906 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19908 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19911 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19912 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19914 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19917 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19918 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19920 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19923 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19924 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19926 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19929 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19930 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19932 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19935 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19936 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19938 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19941 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19942 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19943 int const __d)
19945 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19948 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19949 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19950 int const __d)
19952 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19955 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19956 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19958 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19961 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19962 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19964 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19967 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19968 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19970 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19973 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19974 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19976 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19979 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19980 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19982 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19985 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19986 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19988 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19991 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19992 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19994 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19997 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19998 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
20000 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
20003 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20004 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
20006 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
20009 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20010 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
20012 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
20015 /* vqdmlsl */
20017 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20018 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
20020 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
20023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20024 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
20026 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
20029 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20030 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
20031 int const __d)
20033 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
20036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20037 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
20038 int const __d)
20040 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
20043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20044 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
20046 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
20049 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20050 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
20052 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
20055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20056 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
20058 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
20061 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20062 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
20064 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
20067 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20068 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
20070 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
20073 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20074 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
20076 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
20079 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20080 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
20081 int const __d)
20083 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
20086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20087 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
20088 int const __d)
20090 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
20093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20094 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
20096 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
20099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20100 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
20102 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
20105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20106 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
20108 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
20111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20112 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
20114 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
20117 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20118 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
20120 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
20123 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20124 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
20126 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
20129 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20130 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
20132 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
20135 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20136 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
20138 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
20141 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20142 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
20144 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
20147 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20148 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
20150 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
20153 /* vqdmulh */
20155 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20156 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
20158 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
20161 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20162 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
20164 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
20167 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20168 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
20170 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
20173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20174 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
20176 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
20179 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20180 vqdmulhh_s16 (int16_t __a, int16_t __b)
20182 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
20185 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20186 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
20188 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
20191 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20192 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
20194 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
20197 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20198 vqdmulhs_s32 (int32_t __a, int32_t __b)
20200 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
20203 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20204 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
20206 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
20209 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20210 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
20212 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
20215 /* vqdmull */
20217 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20218 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
20220 return __builtin_aarch64_sqdmullv4hi (__a, __b);
20223 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20224 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
20226 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
20229 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20230 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
20232 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
20235 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20236 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
20238 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
20241 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20242 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
20244 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
20247 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20248 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
20250 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
20253 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20254 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
20256 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
20259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20260 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
20262 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
20265 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20266 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
20268 return __builtin_aarch64_sqdmullv2si (__a, __b);
20271 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20272 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
20274 return __builtin_aarch64_sqdmull2v4si (__a, __b);
20277 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20278 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
20280 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
20283 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20284 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
20286 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
20289 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20290 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
20292 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
20295 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20296 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
20298 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
20301 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20302 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
20304 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
20307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20308 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
20310 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
20313 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20314 vqdmullh_s16 (int16_t __a, int16_t __b)
20316 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
20319 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20320 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
20322 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
20325 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20326 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
20328 return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
20331 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20332 vqdmulls_s32 (int32_t __a, int32_t __b)
20334 return __builtin_aarch64_sqdmullsi (__a, __b);
20337 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20338 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
20340 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
20343 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20344 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
20346 return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
20349 /* vqmovn */
20351 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20352 vqmovn_s16 (int16x8_t __a)
20354 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
20357 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20358 vqmovn_s32 (int32x4_t __a)
20360 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
20363 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20364 vqmovn_s64 (int64x2_t __a)
20366 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
20369 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20370 vqmovn_u16 (uint16x8_t __a)
20372 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
20375 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20376 vqmovn_u32 (uint32x4_t __a)
20378 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
20381 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20382 vqmovn_u64 (uint64x2_t __a)
20384 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
20387 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20388 vqmovnh_s16 (int16_t __a)
20390 return (int8_t) __builtin_aarch64_sqmovnhi (__a);
20393 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20394 vqmovns_s32 (int32_t __a)
20396 return (int16_t) __builtin_aarch64_sqmovnsi (__a);
20399 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20400 vqmovnd_s64 (int64_t __a)
20402 return (int32_t) __builtin_aarch64_sqmovndi (__a);
20405 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20406 vqmovnh_u16 (uint16_t __a)
20408 return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
20411 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20412 vqmovns_u32 (uint32_t __a)
20414 return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
20417 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20418 vqmovnd_u64 (uint64_t __a)
20420 return (uint32_t) __builtin_aarch64_uqmovndi (__a);
20423 /* vqmovun */
20425 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20426 vqmovun_s16 (int16x8_t __a)
20428 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
20431 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20432 vqmovun_s32 (int32x4_t __a)
20434 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
20437 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20438 vqmovun_s64 (int64x2_t __a)
20440 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
20443 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20444 vqmovunh_s16 (int16_t __a)
20446 return (int8_t) __builtin_aarch64_sqmovunhi (__a);
20449 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20450 vqmovuns_s32 (int32_t __a)
20452 return (int16_t) __builtin_aarch64_sqmovunsi (__a);
20455 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20456 vqmovund_s64 (int64_t __a)
20458 return (int32_t) __builtin_aarch64_sqmovundi (__a);
20461 /* vqneg */
20463 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20464 vqnegq_s64 (int64x2_t __a)
20466 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
20469 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20470 vqnegb_s8 (int8_t __a)
20472 return (int8_t) __builtin_aarch64_sqnegqi (__a);
20475 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20476 vqnegh_s16 (int16_t __a)
20478 return (int16_t) __builtin_aarch64_sqneghi (__a);
20481 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20482 vqnegs_s32 (int32_t __a)
20484 return (int32_t) __builtin_aarch64_sqnegsi (__a);
20487 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20488 vqnegd_s64 (int64_t __a)
20490 return __builtin_aarch64_sqnegdi (__a);
20493 /* vqrdmulh */
20495 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20496 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
20498 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
20501 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20502 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
20504 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
20507 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20508 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
20510 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
20513 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20514 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
20516 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
20519 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20520 vqrdmulhh_s16 (int16_t __a, int16_t __b)
20522 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
20525 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20526 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
20528 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
20531 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20532 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
20534 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
20537 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20538 vqrdmulhs_s32 (int32_t __a, int32_t __b)
20540 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
20543 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20544 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
20546 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
20549 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20550 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
20552 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
20555 /* vqrshl */
20557 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20558 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
20560 return __builtin_aarch64_sqrshlv8qi (__a, __b);
20563 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20564 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
20566 return __builtin_aarch64_sqrshlv4hi (__a, __b);
20569 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20570 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
20572 return __builtin_aarch64_sqrshlv2si (__a, __b);
20575 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20576 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
20578 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
20581 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20582 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
20584 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
20587 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20588 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
20590 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
20593 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20594 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
20596 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
20599 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20600 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
20602 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
20605 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20606 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
20608 return __builtin_aarch64_sqrshlv16qi (__a, __b);
20611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20612 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
20614 return __builtin_aarch64_sqrshlv8hi (__a, __b);
20617 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20618 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
20620 return __builtin_aarch64_sqrshlv4si (__a, __b);
20623 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20624 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
20626 return __builtin_aarch64_sqrshlv2di (__a, __b);
20629 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20630 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
20632 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
20635 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20636 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
20638 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
20641 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20642 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
20644 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
20647 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20648 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
20650 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
20653 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20654 vqrshlb_s8 (int8_t __a, int8_t __b)
20656 return __builtin_aarch64_sqrshlqi (__a, __b);
20659 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20660 vqrshlh_s16 (int16_t __a, int16_t __b)
20662 return __builtin_aarch64_sqrshlhi (__a, __b);
20665 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20666 vqrshls_s32 (int32_t __a, int32_t __b)
20668 return __builtin_aarch64_sqrshlsi (__a, __b);
20671 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20672 vqrshld_s64 (int64_t __a, int64_t __b)
20674 return __builtin_aarch64_sqrshldi (__a, __b);
20677 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20678 vqrshlb_u8 (uint8_t __a, uint8_t __b)
20680 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
20683 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20684 vqrshlh_u16 (uint16_t __a, uint16_t __b)
20686 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
20689 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20690 vqrshls_u32 (uint32_t __a, uint32_t __b)
20692 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20695 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20696 vqrshld_u64 (uint64_t __a, uint64_t __b)
20698 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20701 /* vqrshrn */
20703 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20704 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20706 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20709 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20710 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20712 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20715 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20716 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20718 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20721 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20722 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20724 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20727 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20728 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20730 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20733 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20734 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20736 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20739 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20740 vqrshrnh_n_s16 (int16_t __a, const int __b)
20742 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20745 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20746 vqrshrns_n_s32 (int32_t __a, const int __b)
20748 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20751 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20752 vqrshrnd_n_s64 (int64_t __a, const int __b)
20754 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20757 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20758 vqrshrnh_n_u16 (uint16_t __a, const int __b)
20760 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20763 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20764 vqrshrns_n_u32 (uint32_t __a, const int __b)
20766 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20769 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20770 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20772 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20775 /* vqrshrun */
20777 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20778 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20780 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20783 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20784 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20786 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20789 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20790 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20792 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20795 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20796 vqrshrunh_n_s16 (int16_t __a, const int __b)
20798 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20801 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20802 vqrshruns_n_s32 (int32_t __a, const int __b)
20804 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20807 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20808 vqrshrund_n_s64 (int64_t __a, const int __b)
20810 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20813 /* vqshl */
20815 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20816 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20818 return __builtin_aarch64_sqshlv8qi (__a, __b);
20821 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20822 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20824 return __builtin_aarch64_sqshlv4hi (__a, __b);
20827 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20828 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20830 return __builtin_aarch64_sqshlv2si (__a, __b);
20833 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20834 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20836 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20839 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20840 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20842 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20845 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20846 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20848 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20851 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20852 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20854 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20857 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20858 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20860 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20863 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20864 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20866 return __builtin_aarch64_sqshlv16qi (__a, __b);
20869 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20870 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20872 return __builtin_aarch64_sqshlv8hi (__a, __b);
20875 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20876 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20878 return __builtin_aarch64_sqshlv4si (__a, __b);
20881 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20882 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20884 return __builtin_aarch64_sqshlv2di (__a, __b);
20887 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20888 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20890 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20894 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20896 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20899 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20900 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20902 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20905 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20906 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20908 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20911 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20912 vqshlb_s8 (int8_t __a, int8_t __b)
20914 return __builtin_aarch64_sqshlqi (__a, __b);
20917 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20918 vqshlh_s16 (int16_t __a, int16_t __b)
20920 return __builtin_aarch64_sqshlhi (__a, __b);
20923 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20924 vqshls_s32 (int32_t __a, int32_t __b)
20926 return __builtin_aarch64_sqshlsi (__a, __b);
20929 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20930 vqshld_s64 (int64_t __a, int64_t __b)
20932 return __builtin_aarch64_sqshldi (__a, __b);
20935 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20936 vqshlb_u8 (uint8_t __a, uint8_t __b)
20938 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20941 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20942 vqshlh_u16 (uint16_t __a, uint16_t __b)
20944 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20947 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20948 vqshls_u32 (uint32_t __a, uint32_t __b)
20950 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20953 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20954 vqshld_u64 (uint64_t __a, uint64_t __b)
20956 return __builtin_aarch64_uqshldi_uus (__a, __b);
20959 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20960 vqshl_n_s8 (int8x8_t __a, const int __b)
20962 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20966 vqshl_n_s16 (int16x4_t __a, const int __b)
20968 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20971 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20972 vqshl_n_s32 (int32x2_t __a, const int __b)
20974 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20977 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20978 vqshl_n_s64 (int64x1_t __a, const int __b)
20980 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20983 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20984 vqshl_n_u8 (uint8x8_t __a, const int __b)
20986 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20989 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20990 vqshl_n_u16 (uint16x4_t __a, const int __b)
20992 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20995 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20996 vqshl_n_u32 (uint32x2_t __a, const int __b)
20998 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
21001 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21002 vqshl_n_u64 (uint64x1_t __a, const int __b)
21004 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
21007 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21008 vqshlq_n_s8 (int8x16_t __a, const int __b)
21010 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
21013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21014 vqshlq_n_s16 (int16x8_t __a, const int __b)
21016 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
21019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21020 vqshlq_n_s32 (int32x4_t __a, const int __b)
21022 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
21025 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21026 vqshlq_n_s64 (int64x2_t __a, const int __b)
21028 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
21031 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21032 vqshlq_n_u8 (uint8x16_t __a, const int __b)
21034 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
21037 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21038 vqshlq_n_u16 (uint16x8_t __a, const int __b)
21040 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
21043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21044 vqshlq_n_u32 (uint32x4_t __a, const int __b)
21046 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
21049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21050 vqshlq_n_u64 (uint64x2_t __a, const int __b)
21052 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
21055 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21056 vqshlb_n_s8 (int8_t __a, const int __b)
21058 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
21061 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21062 vqshlh_n_s16 (int16_t __a, const int __b)
21064 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
21067 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21068 vqshls_n_s32 (int32_t __a, const int __b)
21070 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
21073 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21074 vqshld_n_s64 (int64_t __a, const int __b)
21076 return __builtin_aarch64_sqshl_ndi (__a, __b);
21079 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21080 vqshlb_n_u8 (uint8_t __a, const int __b)
21082 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
21085 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21086 vqshlh_n_u16 (uint16_t __a, const int __b)
21088 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
21091 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21092 vqshls_n_u32 (uint32_t __a, const int __b)
21094 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
21097 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21098 vqshld_n_u64 (uint64_t __a, const int __b)
21100 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
21103 /* vqshlu */
21105 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21106 vqshlu_n_s8 (int8x8_t __a, const int __b)
21108 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
21111 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21112 vqshlu_n_s16 (int16x4_t __a, const int __b)
21114 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
21117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21118 vqshlu_n_s32 (int32x2_t __a, const int __b)
21120 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
21123 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21124 vqshlu_n_s64 (int64x1_t __a, const int __b)
21126 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
21129 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21130 vqshluq_n_s8 (int8x16_t __a, const int __b)
21132 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
21135 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21136 vqshluq_n_s16 (int16x8_t __a, const int __b)
21138 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
21141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21142 vqshluq_n_s32 (int32x4_t __a, const int __b)
21144 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
21147 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21148 vqshluq_n_s64 (int64x2_t __a, const int __b)
21150 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
21153 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21154 vqshlub_n_s8 (int8_t __a, const int __b)
21156 return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
21159 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21160 vqshluh_n_s16 (int16_t __a, const int __b)
21162 return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
21165 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21166 vqshlus_n_s32 (int32_t __a, const int __b)
21168 return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
21171 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21172 vqshlud_n_s64 (int64_t __a, const int __b)
21174 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
21177 /* vqshrn */
21179 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21180 vqshrn_n_s16 (int16x8_t __a, const int __b)
21182 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
21185 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21186 vqshrn_n_s32 (int32x4_t __a, const int __b)
21188 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
21191 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21192 vqshrn_n_s64 (int64x2_t __a, const int __b)
21194 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
21197 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21198 vqshrn_n_u16 (uint16x8_t __a, const int __b)
21200 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
21203 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21204 vqshrn_n_u32 (uint32x4_t __a, const int __b)
21206 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
21209 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21210 vqshrn_n_u64 (uint64x2_t __a, const int __b)
21212 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
21215 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21216 vqshrnh_n_s16 (int16_t __a, const int __b)
21218 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
21221 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21222 vqshrns_n_s32 (int32_t __a, const int __b)
21224 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
21227 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21228 vqshrnd_n_s64 (int64_t __a, const int __b)
21230 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
21233 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21234 vqshrnh_n_u16 (uint16_t __a, const int __b)
21236 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
21239 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21240 vqshrns_n_u32 (uint32_t __a, const int __b)
21242 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
21245 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21246 vqshrnd_n_u64 (uint64_t __a, const int __b)
21248 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
21251 /* vqshrun */
21253 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21254 vqshrun_n_s16 (int16x8_t __a, const int __b)
21256 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
21259 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21260 vqshrun_n_s32 (int32x4_t __a, const int __b)
21262 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
21265 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21266 vqshrun_n_s64 (int64x2_t __a, const int __b)
21268 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
21271 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21272 vqshrunh_n_s16 (int16_t __a, const int __b)
21274 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
21277 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21278 vqshruns_n_s32 (int32_t __a, const int __b)
21280 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
21283 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21284 vqshrund_n_s64 (int64_t __a, const int __b)
21286 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
21289 /* vqsub */
21291 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21292 vqsubb_s8 (int8_t __a, int8_t __b)
21294 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
21297 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21298 vqsubh_s16 (int16_t __a, int16_t __b)
21300 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
21303 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21304 vqsubs_s32 (int32_t __a, int32_t __b)
21306 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
21309 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21310 vqsubd_s64 (int64_t __a, int64_t __b)
21312 return __builtin_aarch64_sqsubdi (__a, __b);
21315 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21316 vqsubb_u8 (uint8_t __a, uint8_t __b)
21318 return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
21321 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21322 vqsubh_u16 (uint16_t __a, uint16_t __b)
21324 return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
21327 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21328 vqsubs_u32 (uint32_t __a, uint32_t __b)
21330 return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
21333 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21334 vqsubd_u64 (uint64_t __a, uint64_t __b)
21336 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
21339 /* vrbit */
21341 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21342 vrbit_p8 (poly8x8_t __a)
21344 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
21347 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21348 vrbit_s8 (int8x8_t __a)
21350 return __builtin_aarch64_rbitv8qi (__a);
21353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21354 vrbit_u8 (uint8x8_t __a)
21356 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
21359 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21360 vrbitq_p8 (poly8x16_t __a)
21362 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
21365 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21366 vrbitq_s8 (int8x16_t __a)
21368 return __builtin_aarch64_rbitv16qi (__a);
21371 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21372 vrbitq_u8 (uint8x16_t __a)
21374 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
21377 /* vrecpe */
21379 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21380 vrecpes_f32 (float32_t __a)
21382 return __builtin_aarch64_frecpesf (__a);
21385 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21386 vrecped_f64 (float64_t __a)
21388 return __builtin_aarch64_frecpedf (__a);
21391 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21392 vrecpe_f32 (float32x2_t __a)
21394 return __builtin_aarch64_frecpev2sf (__a);
21397 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21398 vrecpeq_f32 (float32x4_t __a)
21400 return __builtin_aarch64_frecpev4sf (__a);
21403 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21404 vrecpeq_f64 (float64x2_t __a)
21406 return __builtin_aarch64_frecpev2df (__a);
21409 /* vrecps */
21411 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21412 vrecpss_f32 (float32_t __a, float32_t __b)
21414 return __builtin_aarch64_frecpssf (__a, __b);
21417 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21418 vrecpsd_f64 (float64_t __a, float64_t __b)
21420 return __builtin_aarch64_frecpsdf (__a, __b);
21423 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21424 vrecps_f32 (float32x2_t __a, float32x2_t __b)
21426 return __builtin_aarch64_frecpsv2sf (__a, __b);
21429 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21430 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
21432 return __builtin_aarch64_frecpsv4sf (__a, __b);
21435 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21436 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
21438 return __builtin_aarch64_frecpsv2df (__a, __b);
21441 /* vrecpx */
21443 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21444 vrecpxs_f32 (float32_t __a)
21446 return __builtin_aarch64_frecpxsf (__a);
21449 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21450 vrecpxd_f64 (float64_t __a)
21452 return __builtin_aarch64_frecpxdf (__a);
21456 /* vrev */
21458 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21459 vrev16_p8 (poly8x8_t a)
21461 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21464 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21465 vrev16_s8 (int8x8_t a)
21467 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21470 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21471 vrev16_u8 (uint8x8_t a)
21473 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21476 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21477 vrev16q_p8 (poly8x16_t a)
21479 return __builtin_shuffle (a,
21480 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21484 vrev16q_s8 (int8x16_t a)
21486 return __builtin_shuffle (a,
21487 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21490 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21491 vrev16q_u8 (uint8x16_t a)
21493 return __builtin_shuffle (a,
21494 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21497 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21498 vrev32_p8 (poly8x8_t a)
21500 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21503 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21504 vrev32_p16 (poly16x4_t a)
21506 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21509 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21510 vrev32_s8 (int8x8_t a)
21512 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21515 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21516 vrev32_s16 (int16x4_t a)
21518 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21521 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21522 vrev32_u8 (uint8x8_t a)
21524 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21527 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21528 vrev32_u16 (uint16x4_t a)
21530 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21533 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21534 vrev32q_p8 (poly8x16_t a)
21536 return __builtin_shuffle (a,
21537 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21540 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21541 vrev32q_p16 (poly16x8_t a)
21543 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21546 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21547 vrev32q_s8 (int8x16_t a)
21549 return __builtin_shuffle (a,
21550 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21553 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21554 vrev32q_s16 (int16x8_t a)
21556 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21559 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21560 vrev32q_u8 (uint8x16_t a)
21562 return __builtin_shuffle (a,
21563 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21567 vrev32q_u16 (uint16x8_t a)
21569 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21572 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21573 vrev64_f32 (float32x2_t a)
21575 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21578 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21579 vrev64_p8 (poly8x8_t a)
21581 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21584 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21585 vrev64_p16 (poly16x4_t a)
21587 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21590 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21591 vrev64_s8 (int8x8_t a)
21593 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21596 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21597 vrev64_s16 (int16x4_t a)
21599 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21602 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21603 vrev64_s32 (int32x2_t a)
21605 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21608 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21609 vrev64_u8 (uint8x8_t a)
21611 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21614 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21615 vrev64_u16 (uint16x4_t a)
21617 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21621 vrev64_u32 (uint32x2_t a)
21623 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21626 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21627 vrev64q_f32 (float32x4_t a)
21629 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21632 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21633 vrev64q_p8 (poly8x16_t a)
21635 return __builtin_shuffle (a,
21636 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21639 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21640 vrev64q_p16 (poly16x8_t a)
21642 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21645 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21646 vrev64q_s8 (int8x16_t a)
21648 return __builtin_shuffle (a,
21649 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21652 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21653 vrev64q_s16 (int16x8_t a)
21655 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21658 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21659 vrev64q_s32 (int32x4_t a)
21661 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21664 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21665 vrev64q_u8 (uint8x16_t a)
21667 return __builtin_shuffle (a,
21668 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21671 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21672 vrev64q_u16 (uint16x8_t a)
21674 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21677 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21678 vrev64q_u32 (uint32x4_t a)
21680 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21683 /* vrnd */
21685 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21686 vrnd_f32 (float32x2_t __a)
21688 return __builtin_aarch64_btruncv2sf (__a);
21691 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21692 vrnd_f64 (float64x1_t __a)
21694 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21697 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21698 vrndq_f32 (float32x4_t __a)
21700 return __builtin_aarch64_btruncv4sf (__a);
21703 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21704 vrndq_f64 (float64x2_t __a)
21706 return __builtin_aarch64_btruncv2df (__a);
21709 /* vrnda */
21711 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21712 vrnda_f32 (float32x2_t __a)
21714 return __builtin_aarch64_roundv2sf (__a);
21717 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21718 vrnda_f64 (float64x1_t __a)
21720 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21723 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21724 vrndaq_f32 (float32x4_t __a)
21726 return __builtin_aarch64_roundv4sf (__a);
21729 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21730 vrndaq_f64 (float64x2_t __a)
21732 return __builtin_aarch64_roundv2df (__a);
21735 /* vrndi */
21737 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21738 vrndi_f32 (float32x2_t __a)
21740 return __builtin_aarch64_nearbyintv2sf (__a);
21743 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21744 vrndi_f64 (float64x1_t __a)
21746 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21749 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21750 vrndiq_f32 (float32x4_t __a)
21752 return __builtin_aarch64_nearbyintv4sf (__a);
21755 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21756 vrndiq_f64 (float64x2_t __a)
21758 return __builtin_aarch64_nearbyintv2df (__a);
21761 /* vrndm */
21763 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21764 vrndm_f32 (float32x2_t __a)
21766 return __builtin_aarch64_floorv2sf (__a);
21769 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21770 vrndm_f64 (float64x1_t __a)
21772 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21775 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21776 vrndmq_f32 (float32x4_t __a)
21778 return __builtin_aarch64_floorv4sf (__a);
21781 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21782 vrndmq_f64 (float64x2_t __a)
21784 return __builtin_aarch64_floorv2df (__a);
21787 /* vrndn */
21789 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21790 vrndn_f32 (float32x2_t __a)
21792 return __builtin_aarch64_frintnv2sf (__a);
21795 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21796 vrndn_f64 (float64x1_t __a)
21798 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21801 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21802 vrndnq_f32 (float32x4_t __a)
21804 return __builtin_aarch64_frintnv4sf (__a);
21807 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21808 vrndnq_f64 (float64x2_t __a)
21810 return __builtin_aarch64_frintnv2df (__a);
21813 /* vrndp */
21815 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21816 vrndp_f32 (float32x2_t __a)
21818 return __builtin_aarch64_ceilv2sf (__a);
21821 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21822 vrndp_f64 (float64x1_t __a)
21824 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21827 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21828 vrndpq_f32 (float32x4_t __a)
21830 return __builtin_aarch64_ceilv4sf (__a);
21833 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21834 vrndpq_f64 (float64x2_t __a)
21836 return __builtin_aarch64_ceilv2df (__a);
21839 /* vrndx */
21841 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21842 vrndx_f32 (float32x2_t __a)
21844 return __builtin_aarch64_rintv2sf (__a);
21847 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21848 vrndx_f64 (float64x1_t __a)
21850 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21853 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21854 vrndxq_f32 (float32x4_t __a)
21856 return __builtin_aarch64_rintv4sf (__a);
21859 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21860 vrndxq_f64 (float64x2_t __a)
21862 return __builtin_aarch64_rintv2df (__a);
21865 /* vrshl */
21867 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21868 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21870 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21873 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21874 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21876 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21879 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21880 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21882 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21885 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21886 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21888 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21891 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21892 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21894 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21897 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21898 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21900 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21903 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21904 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21906 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21909 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21910 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21912 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21915 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21916 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21918 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21921 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21922 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21924 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21927 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21928 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21930 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21933 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21934 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21936 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21939 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21940 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21942 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21946 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21948 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21951 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21952 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21954 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21957 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21958 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21960 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21963 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21964 vrshld_s64 (int64_t __a, int64_t __b)
21966 return __builtin_aarch64_srshldi (__a, __b);
21969 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21970 vrshld_u64 (uint64_t __a, int64_t __b)
21972 return __builtin_aarch64_urshldi_uus (__a, __b);
21975 /* vrshr */
21977 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21978 vrshr_n_s8 (int8x8_t __a, const int __b)
21980 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21983 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21984 vrshr_n_s16 (int16x4_t __a, const int __b)
21986 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21989 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21990 vrshr_n_s32 (int32x2_t __a, const int __b)
21992 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21995 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21996 vrshr_n_s64 (int64x1_t __a, const int __b)
21998 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
22001 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22002 vrshr_n_u8 (uint8x8_t __a, const int __b)
22004 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
22007 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22008 vrshr_n_u16 (uint16x4_t __a, const int __b)
22010 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
22013 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22014 vrshr_n_u32 (uint32x2_t __a, const int __b)
22016 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
22019 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22020 vrshr_n_u64 (uint64x1_t __a, const int __b)
22022 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
22025 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22026 vrshrq_n_s8 (int8x16_t __a, const int __b)
22028 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
22031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22032 vrshrq_n_s16 (int16x8_t __a, const int __b)
22034 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
22037 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22038 vrshrq_n_s32 (int32x4_t __a, const int __b)
22040 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
22043 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22044 vrshrq_n_s64 (int64x2_t __a, const int __b)
22046 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
22049 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22050 vrshrq_n_u8 (uint8x16_t __a, const int __b)
22052 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
22055 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22056 vrshrq_n_u16 (uint16x8_t __a, const int __b)
22058 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
22061 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22062 vrshrq_n_u32 (uint32x4_t __a, const int __b)
22064 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
22067 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22068 vrshrq_n_u64 (uint64x2_t __a, const int __b)
22070 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
22073 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22074 vrshrd_n_s64 (int64_t __a, const int __b)
22076 return __builtin_aarch64_srshr_ndi (__a, __b);
22079 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22080 vrshrd_n_u64 (uint64_t __a, const int __b)
22082 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
22085 /* vrsra */
22087 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22088 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22090 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
22093 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22094 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22096 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
22099 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22100 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22102 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
22105 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22106 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22108 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
22111 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22112 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22114 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
22117 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22118 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22120 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
22123 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22124 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22126 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
22129 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22130 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22132 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
22135 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22136 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22138 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
22141 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22142 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22144 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
22147 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22148 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22150 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
22153 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22154 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22156 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
22159 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22160 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22162 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
22165 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22166 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22168 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
22171 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22172 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22174 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
22177 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22178 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22180 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
22183 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22184 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22186 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
22189 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22190 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22192 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
22195 #ifdef __ARM_FEATURE_CRYPTO
22197 /* vsha1 */
22199 static __inline uint32x4_t
22200 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22202 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
22204 static __inline uint32x4_t
22205 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22207 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
22209 static __inline uint32x4_t
22210 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22212 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
22215 static __inline uint32_t
22216 vsha1h_u32 (uint32_t hash_e)
22218 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
22221 static __inline uint32x4_t
22222 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
22224 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
22227 static __inline uint32x4_t
22228 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
22230 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
22233 static __inline uint32x4_t
22234 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
22236 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
22239 static __inline uint32x4_t
22240 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
22242 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
22245 static __inline uint32x4_t
22246 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
22248 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
22251 static __inline uint32x4_t
22252 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
22254 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
22257 static __inline poly128_t
22258 vmull_p64 (poly64_t a, poly64_t b)
22260 return
22261 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
22264 static __inline poly128_t
22265 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
22267 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
22270 #endif
22272 /* vshl */
22274 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22275 vshl_n_s8 (int8x8_t __a, const int __b)
22277 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
22280 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22281 vshl_n_s16 (int16x4_t __a, const int __b)
22283 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
22286 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22287 vshl_n_s32 (int32x2_t __a, const int __b)
22289 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
22292 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22293 vshl_n_s64 (int64x1_t __a, const int __b)
22295 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
22298 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22299 vshl_n_u8 (uint8x8_t __a, const int __b)
22301 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
22304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22305 vshl_n_u16 (uint16x4_t __a, const int __b)
22307 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
22310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22311 vshl_n_u32 (uint32x2_t __a, const int __b)
22313 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
22316 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22317 vshl_n_u64 (uint64x1_t __a, const int __b)
22319 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
22322 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22323 vshlq_n_s8 (int8x16_t __a, const int __b)
22325 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
22328 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22329 vshlq_n_s16 (int16x8_t __a, const int __b)
22331 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
22334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22335 vshlq_n_s32 (int32x4_t __a, const int __b)
22337 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
22340 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22341 vshlq_n_s64 (int64x2_t __a, const int __b)
22343 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
22346 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22347 vshlq_n_u8 (uint8x16_t __a, const int __b)
22349 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
22352 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22353 vshlq_n_u16 (uint16x8_t __a, const int __b)
22355 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
22358 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22359 vshlq_n_u32 (uint32x4_t __a, const int __b)
22361 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
22364 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22365 vshlq_n_u64 (uint64x2_t __a, const int __b)
22367 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
22370 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22371 vshld_n_s64 (int64_t __a, const int __b)
22373 return __builtin_aarch64_ashldi (__a, __b);
22376 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22377 vshld_n_u64 (uint64_t __a, const int __b)
22379 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
22382 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22383 vshl_s8 (int8x8_t __a, int8x8_t __b)
22385 return __builtin_aarch64_sshlv8qi (__a, __b);
22388 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22389 vshl_s16 (int16x4_t __a, int16x4_t __b)
22391 return __builtin_aarch64_sshlv4hi (__a, __b);
22394 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22395 vshl_s32 (int32x2_t __a, int32x2_t __b)
22397 return __builtin_aarch64_sshlv2si (__a, __b);
22400 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22401 vshl_s64 (int64x1_t __a, int64x1_t __b)
22403 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
22406 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22407 vshl_u8 (uint8x8_t __a, int8x8_t __b)
22409 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
22412 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22413 vshl_u16 (uint16x4_t __a, int16x4_t __b)
22415 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
22418 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22419 vshl_u32 (uint32x2_t __a, int32x2_t __b)
22421 return __builtin_aarch64_ushlv2si_uus (__a, __b);
22424 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22425 vshl_u64 (uint64x1_t __a, int64x1_t __b)
22427 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
22430 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22431 vshlq_s8 (int8x16_t __a, int8x16_t __b)
22433 return __builtin_aarch64_sshlv16qi (__a, __b);
22436 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22437 vshlq_s16 (int16x8_t __a, int16x8_t __b)
22439 return __builtin_aarch64_sshlv8hi (__a, __b);
22442 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22443 vshlq_s32 (int32x4_t __a, int32x4_t __b)
22445 return __builtin_aarch64_sshlv4si (__a, __b);
22448 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22449 vshlq_s64 (int64x2_t __a, int64x2_t __b)
22451 return __builtin_aarch64_sshlv2di (__a, __b);
22454 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22455 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
22457 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
22460 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22461 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
22463 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
22466 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22467 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
22469 return __builtin_aarch64_ushlv4si_uus (__a, __b);
22472 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22473 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
22475 return __builtin_aarch64_ushlv2di_uus (__a, __b);
22478 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22479 vshld_s64 (int64_t __a, int64_t __b)
22481 return __builtin_aarch64_sshldi (__a, __b);
22484 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22485 vshld_u64 (uint64_t __a, uint64_t __b)
22487 return __builtin_aarch64_ushldi_uus (__a, __b);
22490 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22491 vshll_high_n_s8 (int8x16_t __a, const int __b)
22493 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
22496 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22497 vshll_high_n_s16 (int16x8_t __a, const int __b)
22499 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
22502 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22503 vshll_high_n_s32 (int32x4_t __a, const int __b)
22505 return __builtin_aarch64_sshll2_nv4si (__a, __b);
22508 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22509 vshll_high_n_u8 (uint8x16_t __a, const int __b)
22511 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
22514 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22515 vshll_high_n_u16 (uint16x8_t __a, const int __b)
22517 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
22520 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22521 vshll_high_n_u32 (uint32x4_t __a, const int __b)
22523 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
22526 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22527 vshll_n_s8 (int8x8_t __a, const int __b)
22529 return __builtin_aarch64_sshll_nv8qi (__a, __b);
22532 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22533 vshll_n_s16 (int16x4_t __a, const int __b)
22535 return __builtin_aarch64_sshll_nv4hi (__a, __b);
22538 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22539 vshll_n_s32 (int32x2_t __a, const int __b)
22541 return __builtin_aarch64_sshll_nv2si (__a, __b);
22544 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22545 vshll_n_u8 (uint8x8_t __a, const int __b)
22547 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
22550 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22551 vshll_n_u16 (uint16x4_t __a, const int __b)
22553 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
22556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22557 vshll_n_u32 (uint32x2_t __a, const int __b)
22559 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
22562 /* vshr */
22564 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22565 vshr_n_s8 (int8x8_t __a, const int __b)
22567 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
22570 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22571 vshr_n_s16 (int16x4_t __a, const int __b)
22573 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
22576 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22577 vshr_n_s32 (int32x2_t __a, const int __b)
22579 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
22582 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22583 vshr_n_s64 (int64x1_t __a, const int __b)
22585 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
22588 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22589 vshr_n_u8 (uint8x8_t __a, const int __b)
22591 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
22594 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22595 vshr_n_u16 (uint16x4_t __a, const int __b)
22597 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22600 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22601 vshr_n_u32 (uint32x2_t __a, const int __b)
22603 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22606 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22607 vshr_n_u64 (uint64x1_t __a, const int __b)
22609 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22612 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22613 vshrq_n_s8 (int8x16_t __a, const int __b)
22615 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22618 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22619 vshrq_n_s16 (int16x8_t __a, const int __b)
22621 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22624 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22625 vshrq_n_s32 (int32x4_t __a, const int __b)
22627 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22630 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22631 vshrq_n_s64 (int64x2_t __a, const int __b)
22633 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22636 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22637 vshrq_n_u8 (uint8x16_t __a, const int __b)
22639 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22643 vshrq_n_u16 (uint16x8_t __a, const int __b)
22645 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22648 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22649 vshrq_n_u32 (uint32x4_t __a, const int __b)
22651 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22654 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22655 vshrq_n_u64 (uint64x2_t __a, const int __b)
22657 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22660 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22661 vshrd_n_s64 (int64_t __a, const int __b)
22663 return __builtin_aarch64_ashr_simddi (__a, __b);
22666 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22667 vshrd_n_u64 (uint64_t __a, const int __b)
22669 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22672 /* vsli */
22674 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22675 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22677 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22681 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22683 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22686 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22687 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22689 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22692 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22693 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22695 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22699 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22701 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22705 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22707 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22711 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22713 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22716 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22717 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22719 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22722 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22723 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22725 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22728 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22729 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22731 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22734 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22735 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22737 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22741 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22743 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22746 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22747 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22749 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22752 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22753 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22755 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22759 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22761 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22764 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22765 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22767 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22770 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22771 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22773 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22776 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22777 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22779 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22782 /* vsqadd */
22784 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22785 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22787 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22790 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22791 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22793 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22796 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22797 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22799 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22802 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22803 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22805 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22808 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22809 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22811 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22814 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22815 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22817 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22820 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22821 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22823 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22826 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22827 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22829 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22832 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
22833 vsqaddb_u8 (uint8_t __a, int8_t __b)
22835 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22838 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
22839 vsqaddh_u16 (uint16_t __a, int16_t __b)
22841 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22844 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
22845 vsqadds_u32 (uint32_t __a, int32_t __b)
22847 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22850 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22851 vsqaddd_u64 (uint64_t __a, int64_t __b)
22853 return __builtin_aarch64_usqadddi_uus (__a, __b);
22856 /* vsqrt */
22857 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22858 vsqrt_f32 (float32x2_t a)
22860 return __builtin_aarch64_sqrtv2sf (a);
22863 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22864 vsqrtq_f32 (float32x4_t a)
22866 return __builtin_aarch64_sqrtv4sf (a);
22869 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22870 vsqrtq_f64 (float64x2_t a)
22872 return __builtin_aarch64_sqrtv2df (a);
22875 /* vsra */
22877 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22878 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22880 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22883 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22884 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22886 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22889 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22890 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22892 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22895 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22896 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22898 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22901 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22902 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22904 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22907 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22908 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22910 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22913 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22914 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22916 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22919 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22920 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22922 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22925 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22926 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22928 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22931 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22932 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22934 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22937 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22938 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22940 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22944 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22946 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22949 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22950 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22952 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22955 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22956 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22958 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22961 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22962 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22964 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22967 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22968 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22970 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22973 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22974 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22976 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22979 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22980 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22982 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22985 /* vsri */
22987 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22988 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22990 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22993 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22994 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22996 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22999 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23000 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23002 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23005 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23006 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23008 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
23011 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23012 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23014 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
23017 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23018 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23020 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
23023 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23024 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23026 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
23029 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23030 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23032 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
23035 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23036 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23038 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23041 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23042 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23044 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
23047 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23048 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23050 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
23053 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23054 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23056 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
23059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23060 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23062 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
23065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23066 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23068 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
23071 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23072 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23074 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
23077 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23078 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23080 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
23083 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23084 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
23086 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
23089 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23090 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
23092 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
23095 /* vst1 */
23097 __extension__ static __inline void __attribute__ ((__always_inline__))
23098 vst1_f32 (float32_t *a, float32x2_t b)
23100 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
23103 __extension__ static __inline void __attribute__ ((__always_inline__))
23104 vst1_f64 (float64_t *a, float64x1_t b)
23106 *a = b[0];
23109 __extension__ static __inline void __attribute__ ((__always_inline__))
23110 vst1_p8 (poly8_t *a, poly8x8_t b)
23112 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23113 (int8x8_t) b);
23116 __extension__ static __inline void __attribute__ ((__always_inline__))
23117 vst1_p16 (poly16_t *a, poly16x4_t b)
23119 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23120 (int16x4_t) b);
23123 __extension__ static __inline void __attribute__ ((__always_inline__))
23124 vst1_s8 (int8_t *a, int8x8_t b)
23126 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
23129 __extension__ static __inline void __attribute__ ((__always_inline__))
23130 vst1_s16 (int16_t *a, int16x4_t b)
23132 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
23135 __extension__ static __inline void __attribute__ ((__always_inline__))
23136 vst1_s32 (int32_t *a, int32x2_t b)
23138 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
23141 __extension__ static __inline void __attribute__ ((__always_inline__))
23142 vst1_s64 (int64_t *a, int64x1_t b)
23144 *a = b[0];
23147 __extension__ static __inline void __attribute__ ((__always_inline__))
23148 vst1_u8 (uint8_t *a, uint8x8_t b)
23150 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23151 (int8x8_t) b);
23154 __extension__ static __inline void __attribute__ ((__always_inline__))
23155 vst1_u16 (uint16_t *a, uint16x4_t b)
23157 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23158 (int16x4_t) b);
23161 __extension__ static __inline void __attribute__ ((__always_inline__))
23162 vst1_u32 (uint32_t *a, uint32x2_t b)
23164 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
23165 (int32x2_t) b);
23168 __extension__ static __inline void __attribute__ ((__always_inline__))
23169 vst1_u64 (uint64_t *a, uint64x1_t b)
23171 *a = b[0];
23174 __extension__ static __inline void __attribute__ ((__always_inline__))
23175 vst1q_f32 (float32_t *a, float32x4_t b)
23177 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
23180 __extension__ static __inline void __attribute__ ((__always_inline__))
23181 vst1q_f64 (float64_t *a, float64x2_t b)
23183 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
23186 /* vst1q */
23188 __extension__ static __inline void __attribute__ ((__always_inline__))
23189 vst1q_p8 (poly8_t *a, poly8x16_t b)
23191 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23192 (int8x16_t) b);
23195 __extension__ static __inline void __attribute__ ((__always_inline__))
23196 vst1q_p16 (poly16_t *a, poly16x8_t b)
23198 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23199 (int16x8_t) b);
23202 __extension__ static __inline void __attribute__ ((__always_inline__))
23203 vst1q_s8 (int8_t *a, int8x16_t b)
23205 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
23208 __extension__ static __inline void __attribute__ ((__always_inline__))
23209 vst1q_s16 (int16_t *a, int16x8_t b)
23211 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
23214 __extension__ static __inline void __attribute__ ((__always_inline__))
23215 vst1q_s32 (int32_t *a, int32x4_t b)
23217 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
23220 __extension__ static __inline void __attribute__ ((__always_inline__))
23221 vst1q_s64 (int64_t *a, int64x2_t b)
23223 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
23226 __extension__ static __inline void __attribute__ ((__always_inline__))
23227 vst1q_u8 (uint8_t *a, uint8x16_t b)
23229 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23230 (int8x16_t) b);
23233 __extension__ static __inline void __attribute__ ((__always_inline__))
23234 vst1q_u16 (uint16_t *a, uint16x8_t b)
23236 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23237 (int16x8_t) b);
23240 __extension__ static __inline void __attribute__ ((__always_inline__))
23241 vst1q_u32 (uint32_t *a, uint32x4_t b)
23243 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
23244 (int32x4_t) b);
23247 __extension__ static __inline void __attribute__ ((__always_inline__))
23248 vst1q_u64 (uint64_t *a, uint64x2_t b)
23250 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
23251 (int64x2_t) b);
23254 /* vstn */
23256 __extension__ static __inline void
23257 vst2_s64 (int64_t * __a, int64x1x2_t val)
23259 __builtin_aarch64_simd_oi __o;
23260 int64x2x2_t temp;
23261 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23262 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23263 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23264 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23265 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23268 __extension__ static __inline void
23269 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
23271 __builtin_aarch64_simd_oi __o;
23272 uint64x2x2_t temp;
23273 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23274 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23275 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23276 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23277 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23280 __extension__ static __inline void
23281 vst2_f64 (float64_t * __a, float64x1x2_t val)
23283 __builtin_aarch64_simd_oi __o;
23284 float64x2x2_t temp;
23285 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23286 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23287 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
23288 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
23289 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
23292 __extension__ static __inline void
23293 vst2_s8 (int8_t * __a, int8x8x2_t val)
23295 __builtin_aarch64_simd_oi __o;
23296 int8x16x2_t temp;
23297 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23298 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23299 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23300 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23301 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23304 __extension__ static __inline void __attribute__ ((__always_inline__))
23305 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
23307 __builtin_aarch64_simd_oi __o;
23308 poly8x16x2_t temp;
23309 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23310 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23311 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23312 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23313 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23316 __extension__ static __inline void __attribute__ ((__always_inline__))
23317 vst2_s16 (int16_t * __a, int16x4x2_t val)
23319 __builtin_aarch64_simd_oi __o;
23320 int16x8x2_t temp;
23321 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23322 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23323 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23324 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23325 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23328 __extension__ static __inline void __attribute__ ((__always_inline__))
23329 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
23331 __builtin_aarch64_simd_oi __o;
23332 poly16x8x2_t temp;
23333 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23334 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23335 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23336 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23337 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23340 __extension__ static __inline void __attribute__ ((__always_inline__))
23341 vst2_s32 (int32_t * __a, int32x2x2_t val)
23343 __builtin_aarch64_simd_oi __o;
23344 int32x4x2_t temp;
23345 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23346 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23347 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23348 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23349 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23352 __extension__ static __inline void __attribute__ ((__always_inline__))
23353 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
23355 __builtin_aarch64_simd_oi __o;
23356 uint8x16x2_t temp;
23357 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23358 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23359 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23360 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23361 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23364 __extension__ static __inline void __attribute__ ((__always_inline__))
23365 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
23367 __builtin_aarch64_simd_oi __o;
23368 uint16x8x2_t temp;
23369 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23370 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23371 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23372 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23373 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23376 __extension__ static __inline void __attribute__ ((__always_inline__))
23377 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
23379 __builtin_aarch64_simd_oi __o;
23380 uint32x4x2_t temp;
23381 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23382 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23383 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23384 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23385 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23388 __extension__ static __inline void __attribute__ ((__always_inline__))
23389 vst2_f32 (float32_t * __a, float32x2x2_t val)
23391 __builtin_aarch64_simd_oi __o;
23392 float32x4x2_t temp;
23393 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23394 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23395 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
23396 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
23397 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23400 __extension__ static __inline void __attribute__ ((__always_inline__))
23401 vst2q_s8 (int8_t * __a, int8x16x2_t val)
23403 __builtin_aarch64_simd_oi __o;
23404 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23405 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23406 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23409 __extension__ static __inline void __attribute__ ((__always_inline__))
23410 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
23412 __builtin_aarch64_simd_oi __o;
23413 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23414 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23415 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23418 __extension__ static __inline void __attribute__ ((__always_inline__))
23419 vst2q_s16 (int16_t * __a, int16x8x2_t val)
23421 __builtin_aarch64_simd_oi __o;
23422 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23423 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23424 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23427 __extension__ static __inline void __attribute__ ((__always_inline__))
23428 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
23430 __builtin_aarch64_simd_oi __o;
23431 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23432 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23433 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23436 __extension__ static __inline void __attribute__ ((__always_inline__))
23437 vst2q_s32 (int32_t * __a, int32x4x2_t val)
23439 __builtin_aarch64_simd_oi __o;
23440 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23441 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23442 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23445 __extension__ static __inline void __attribute__ ((__always_inline__))
23446 vst2q_s64 (int64_t * __a, int64x2x2_t val)
23448 __builtin_aarch64_simd_oi __o;
23449 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23450 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23451 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23454 __extension__ static __inline void __attribute__ ((__always_inline__))
23455 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
23457 __builtin_aarch64_simd_oi __o;
23458 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23459 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23460 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23463 __extension__ static __inline void __attribute__ ((__always_inline__))
23464 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
23466 __builtin_aarch64_simd_oi __o;
23467 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23468 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23469 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23472 __extension__ static __inline void __attribute__ ((__always_inline__))
23473 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
23475 __builtin_aarch64_simd_oi __o;
23476 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23477 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23478 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23481 __extension__ static __inline void __attribute__ ((__always_inline__))
23482 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
23484 __builtin_aarch64_simd_oi __o;
23485 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23486 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23487 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23490 __extension__ static __inline void __attribute__ ((__always_inline__))
23491 vst2q_f32 (float32_t * __a, float32x4x2_t val)
23493 __builtin_aarch64_simd_oi __o;
23494 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
23495 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
23496 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23499 __extension__ static __inline void __attribute__ ((__always_inline__))
23500 vst2q_f64 (float64_t * __a, float64x2x2_t val)
23502 __builtin_aarch64_simd_oi __o;
23503 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
23504 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
23505 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
23508 __extension__ static __inline void
23509 vst3_s64 (int64_t * __a, int64x1x3_t val)
23511 __builtin_aarch64_simd_ci __o;
23512 int64x2x3_t temp;
23513 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23514 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23515 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23516 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23517 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23518 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23519 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23522 __extension__ static __inline void
23523 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
23525 __builtin_aarch64_simd_ci __o;
23526 uint64x2x3_t temp;
23527 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23528 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23529 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23530 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23531 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23532 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23533 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23536 __extension__ static __inline void
23537 vst3_f64 (float64_t * __a, float64x1x3_t val)
23539 __builtin_aarch64_simd_ci __o;
23540 float64x2x3_t temp;
23541 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23542 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23543 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23544 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
23545 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
23546 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
23547 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
23550 __extension__ static __inline void
23551 vst3_s8 (int8_t * __a, int8x8x3_t val)
23553 __builtin_aarch64_simd_ci __o;
23554 int8x16x3_t temp;
23555 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23556 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23557 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23558 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23559 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23560 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23561 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23564 __extension__ static __inline void __attribute__ ((__always_inline__))
23565 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
23567 __builtin_aarch64_simd_ci __o;
23568 poly8x16x3_t temp;
23569 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23570 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23571 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23572 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23573 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23574 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23575 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23578 __extension__ static __inline void __attribute__ ((__always_inline__))
23579 vst3_s16 (int16_t * __a, int16x4x3_t val)
23581 __builtin_aarch64_simd_ci __o;
23582 int16x8x3_t temp;
23583 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23584 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23585 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23586 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23587 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23588 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23589 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23592 __extension__ static __inline void __attribute__ ((__always_inline__))
23593 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
23595 __builtin_aarch64_simd_ci __o;
23596 poly16x8x3_t temp;
23597 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23598 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23599 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23600 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23601 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23602 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23603 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23606 __extension__ static __inline void __attribute__ ((__always_inline__))
23607 vst3_s32 (int32_t * __a, int32x2x3_t val)
23609 __builtin_aarch64_simd_ci __o;
23610 int32x4x3_t temp;
23611 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23612 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23613 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23614 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23615 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23616 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23617 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23620 __extension__ static __inline void __attribute__ ((__always_inline__))
23621 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23623 __builtin_aarch64_simd_ci __o;
23624 uint8x16x3_t temp;
23625 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23626 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23627 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23628 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23629 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23630 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23631 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23634 __extension__ static __inline void __attribute__ ((__always_inline__))
23635 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23637 __builtin_aarch64_simd_ci __o;
23638 uint16x8x3_t temp;
23639 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23640 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23641 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23642 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23643 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23644 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23645 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23648 __extension__ static __inline void __attribute__ ((__always_inline__))
23649 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23651 __builtin_aarch64_simd_ci __o;
23652 uint32x4x3_t temp;
23653 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23654 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23655 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23656 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23657 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23658 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23659 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23662 __extension__ static __inline void __attribute__ ((__always_inline__))
23663 vst3_f32 (float32_t * __a, float32x2x3_t val)
23665 __builtin_aarch64_simd_ci __o;
23666 float32x4x3_t temp;
23667 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23668 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23669 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23670 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23671 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23672 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23673 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23676 __extension__ static __inline void __attribute__ ((__always_inline__))
23677 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23679 __builtin_aarch64_simd_ci __o;
23680 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23681 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23682 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23683 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23686 __extension__ static __inline void __attribute__ ((__always_inline__))
23687 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23689 __builtin_aarch64_simd_ci __o;
23690 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23691 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23692 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23693 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23696 __extension__ static __inline void __attribute__ ((__always_inline__))
23697 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23699 __builtin_aarch64_simd_ci __o;
23700 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23701 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23702 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23703 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23706 __extension__ static __inline void __attribute__ ((__always_inline__))
23707 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23709 __builtin_aarch64_simd_ci __o;
23710 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23711 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23712 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23713 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23716 __extension__ static __inline void __attribute__ ((__always_inline__))
23717 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23719 __builtin_aarch64_simd_ci __o;
23720 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23721 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23722 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23723 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23726 __extension__ static __inline void __attribute__ ((__always_inline__))
23727 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23729 __builtin_aarch64_simd_ci __o;
23730 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23731 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23732 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23733 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23736 __extension__ static __inline void __attribute__ ((__always_inline__))
23737 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23739 __builtin_aarch64_simd_ci __o;
23740 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23741 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23742 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23743 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23746 __extension__ static __inline void __attribute__ ((__always_inline__))
23747 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23749 __builtin_aarch64_simd_ci __o;
23750 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23751 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23752 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23753 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23756 __extension__ static __inline void __attribute__ ((__always_inline__))
23757 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23759 __builtin_aarch64_simd_ci __o;
23760 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23761 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23762 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23763 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23766 __extension__ static __inline void __attribute__ ((__always_inline__))
23767 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23769 __builtin_aarch64_simd_ci __o;
23770 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23771 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23772 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23773 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23776 __extension__ static __inline void __attribute__ ((__always_inline__))
23777 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23779 __builtin_aarch64_simd_ci __o;
23780 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23781 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23782 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23783 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23786 __extension__ static __inline void __attribute__ ((__always_inline__))
23787 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23789 __builtin_aarch64_simd_ci __o;
23790 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23791 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23792 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23793 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23796 __extension__ static __inline void
23797 vst4_s64 (int64_t * __a, int64x1x4_t val)
23799 __builtin_aarch64_simd_xi __o;
23800 int64x2x4_t temp;
23801 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23802 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23803 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23804 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23805 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23806 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23807 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23808 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23809 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23812 __extension__ static __inline void
23813 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23815 __builtin_aarch64_simd_xi __o;
23816 uint64x2x4_t temp;
23817 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23818 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23819 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23820 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23821 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23822 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23823 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23824 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23825 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23828 __extension__ static __inline void
23829 vst4_f64 (float64_t * __a, float64x1x4_t val)
23831 __builtin_aarch64_simd_xi __o;
23832 float64x2x4_t temp;
23833 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23834 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23835 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23836 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23837 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23838 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23839 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23840 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23841 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23844 __extension__ static __inline void
23845 vst4_s8 (int8_t * __a, int8x8x4_t val)
23847 __builtin_aarch64_simd_xi __o;
23848 int8x16x4_t temp;
23849 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23850 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23851 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23852 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23853 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23854 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23855 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23856 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23857 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23860 __extension__ static __inline void __attribute__ ((__always_inline__))
23861 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23863 __builtin_aarch64_simd_xi __o;
23864 poly8x16x4_t temp;
23865 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23866 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23867 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23868 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23869 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23870 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23871 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23872 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23873 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23876 __extension__ static __inline void __attribute__ ((__always_inline__))
23877 vst4_s16 (int16_t * __a, int16x4x4_t val)
23879 __builtin_aarch64_simd_xi __o;
23880 int16x8x4_t temp;
23881 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23882 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23883 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23884 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23885 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23886 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23887 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23888 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23889 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23892 __extension__ static __inline void __attribute__ ((__always_inline__))
23893 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23895 __builtin_aarch64_simd_xi __o;
23896 poly16x8x4_t temp;
23897 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23898 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23899 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23900 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23901 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23902 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23903 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23904 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23905 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23908 __extension__ static __inline void __attribute__ ((__always_inline__))
23909 vst4_s32 (int32_t * __a, int32x2x4_t val)
23911 __builtin_aarch64_simd_xi __o;
23912 int32x4x4_t temp;
23913 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23914 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23915 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23916 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23917 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23918 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23919 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23920 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23921 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23924 __extension__ static __inline void __attribute__ ((__always_inline__))
23925 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23927 __builtin_aarch64_simd_xi __o;
23928 uint8x16x4_t temp;
23929 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23930 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23931 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23932 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23933 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23934 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23935 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23936 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23937 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23940 __extension__ static __inline void __attribute__ ((__always_inline__))
23941 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23943 __builtin_aarch64_simd_xi __o;
23944 uint16x8x4_t temp;
23945 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23946 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23947 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23948 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23949 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23950 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23951 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23952 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23953 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23956 __extension__ static __inline void __attribute__ ((__always_inline__))
23957 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23959 __builtin_aarch64_simd_xi __o;
23960 uint32x4x4_t temp;
23961 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23962 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23963 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23964 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23965 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23966 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23967 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23968 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23969 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23972 __extension__ static __inline void __attribute__ ((__always_inline__))
23973 vst4_f32 (float32_t * __a, float32x2x4_t val)
23975 __builtin_aarch64_simd_xi __o;
23976 float32x4x4_t temp;
23977 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23978 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23979 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23980 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23981 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23982 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23983 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23984 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23985 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23988 __extension__ static __inline void __attribute__ ((__always_inline__))
23989 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23991 __builtin_aarch64_simd_xi __o;
23992 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23993 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23994 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23995 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23996 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23999 __extension__ static __inline void __attribute__ ((__always_inline__))
24000 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24002 __builtin_aarch64_simd_xi __o;
24003 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24004 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24005 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24006 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24007 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24010 __extension__ static __inline void __attribute__ ((__always_inline__))
24011 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24013 __builtin_aarch64_simd_xi __o;
24014 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24015 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24016 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24017 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24018 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24021 __extension__ static __inline void __attribute__ ((__always_inline__))
24022 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24024 __builtin_aarch64_simd_xi __o;
24025 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24026 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24027 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24028 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24029 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24032 __extension__ static __inline void __attribute__ ((__always_inline__))
24033 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24035 __builtin_aarch64_simd_xi __o;
24036 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24037 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24038 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24039 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24040 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24043 __extension__ static __inline void __attribute__ ((__always_inline__))
24044 vst4q_s64 (int64_t * __a, int64x2x4_t val)
24046 __builtin_aarch64_simd_xi __o;
24047 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24048 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24049 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24050 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24051 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24054 __extension__ static __inline void __attribute__ ((__always_inline__))
24055 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24057 __builtin_aarch64_simd_xi __o;
24058 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24059 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24060 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24061 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24062 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24065 __extension__ static __inline void __attribute__ ((__always_inline__))
24066 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24068 __builtin_aarch64_simd_xi __o;
24069 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24070 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24071 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24072 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24073 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24076 __extension__ static __inline void __attribute__ ((__always_inline__))
24077 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24079 __builtin_aarch64_simd_xi __o;
24080 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24081 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24082 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24083 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24084 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24087 __extension__ static __inline void __attribute__ ((__always_inline__))
24088 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24090 __builtin_aarch64_simd_xi __o;
24091 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24092 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24093 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24094 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24095 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24098 __extension__ static __inline void __attribute__ ((__always_inline__))
24099 vst4q_f32 (float32_t * __a, float32x4x4_t val)
24101 __builtin_aarch64_simd_xi __o;
24102 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24103 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24104 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24105 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24106 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24109 __extension__ static __inline void __attribute__ ((__always_inline__))
24110 vst4q_f64 (float64_t * __a, float64x2x4_t val)
24112 __builtin_aarch64_simd_xi __o;
24113 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24114 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24115 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24116 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24117 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24120 /* vsub */
24122 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24123 vsubd_s64 (int64_t __a, int64_t __b)
24125 return __a - __b;
24128 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24129 vsubd_u64 (uint64_t __a, uint64_t __b)
24131 return __a - __b;
24134 /* vtbx1 */
24136 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24137 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24139 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24140 vmov_n_u8 (8));
24141 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24143 return vbsl_s8 (__mask, __tbl, __r);
24146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24147 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24149 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24150 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24152 return vbsl_u8 (__mask, __tbl, __r);
24155 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24156 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24158 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24159 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24161 return vbsl_p8 (__mask, __tbl, __r);
24164 /* vtbx3 */
24166 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24167 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24169 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24170 vmov_n_u8 (24));
24171 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24173 return vbsl_s8 (__mask, __tbl, __r);
24176 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24177 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24179 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24180 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24182 return vbsl_u8 (__mask, __tbl, __r);
24185 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24186 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24188 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24189 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24191 return vbsl_p8 (__mask, __tbl, __r);
24194 /* vtrn */
24196 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24197 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
24199 #ifdef __AARCH64EB__
24200 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24201 #else
24202 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24203 #endif
24206 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24207 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
24209 #ifdef __AARCH64EB__
24210 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24211 #else
24212 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24213 #endif
24216 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24217 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
24219 #ifdef __AARCH64EB__
24220 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24221 #else
24222 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24223 #endif
24226 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24227 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
24229 #ifdef __AARCH64EB__
24230 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24231 #else
24232 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24233 #endif
24236 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24237 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
24239 #ifdef __AARCH64EB__
24240 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24241 #else
24242 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24243 #endif
24246 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24247 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
24249 #ifdef __AARCH64EB__
24250 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24251 #else
24252 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24253 #endif
24256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24257 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
24259 #ifdef __AARCH64EB__
24260 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24261 #else
24262 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24263 #endif
24266 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24267 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
24269 #ifdef __AARCH64EB__
24270 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24271 #else
24272 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24273 #endif
24276 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24277 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
24279 #ifdef __AARCH64EB__
24280 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24281 #else
24282 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24283 #endif
24286 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24287 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
24289 #ifdef __AARCH64EB__
24290 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24291 #else
24292 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24293 #endif
24296 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24297 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
24299 #ifdef __AARCH64EB__
24300 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24301 #else
24302 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24303 #endif
24306 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24307 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
24309 #ifdef __AARCH64EB__
24310 return __builtin_shuffle (__a, __b,
24311 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24312 #else
24313 return __builtin_shuffle (__a, __b,
24314 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24315 #endif
24318 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24319 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
24321 #ifdef __AARCH64EB__
24322 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24323 #else
24324 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24325 #endif
24328 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24329 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
24331 #ifdef __AARCH64EB__
24332 return __builtin_shuffle (__a, __b,
24333 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24334 #else
24335 return __builtin_shuffle (__a, __b,
24336 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24337 #endif
24340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24341 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
24343 #ifdef __AARCH64EB__
24344 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24345 #else
24346 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24347 #endif
24350 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24351 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
24353 #ifdef __AARCH64EB__
24354 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24355 #else
24356 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24357 #endif
24360 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24361 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
24363 #ifdef __AARCH64EB__
24364 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24365 #else
24366 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24367 #endif
24370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24371 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
24373 #ifdef __AARCH64EB__
24374 return __builtin_shuffle (__a, __b,
24375 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24376 #else
24377 return __builtin_shuffle (__a, __b,
24378 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24379 #endif
24382 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24383 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
24385 #ifdef __AARCH64EB__
24386 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24387 #else
24388 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24389 #endif
24392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24393 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
24395 #ifdef __AARCH64EB__
24396 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24397 #else
24398 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24399 #endif
24402 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24403 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
24405 #ifdef __AARCH64EB__
24406 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24407 #else
24408 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24409 #endif
24412 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24413 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
24415 #ifdef __AARCH64EB__
24416 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24417 #else
24418 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24419 #endif
24422 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24423 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
24425 #ifdef __AARCH64EB__
24426 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24427 #else
24428 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24429 #endif
24432 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24433 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
24435 #ifdef __AARCH64EB__
24436 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24437 #else
24438 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24439 #endif
24442 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24443 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
24445 #ifdef __AARCH64EB__
24446 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24447 #else
24448 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24449 #endif
24452 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24453 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
24455 #ifdef __AARCH64EB__
24456 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24457 #else
24458 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24459 #endif
24462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24463 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
24465 #ifdef __AARCH64EB__
24466 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24467 #else
24468 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24469 #endif
24472 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24473 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
24475 #ifdef __AARCH64EB__
24476 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24477 #else
24478 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24479 #endif
24482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24483 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
24485 #ifdef __AARCH64EB__
24486 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24487 #else
24488 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24489 #endif
24492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24493 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
24495 #ifdef __AARCH64EB__
24496 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24497 #else
24498 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24499 #endif
24502 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24503 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
24505 #ifdef __AARCH64EB__
24506 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24507 #else
24508 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24509 #endif
24512 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24513 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
24515 #ifdef __AARCH64EB__
24516 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24517 #else
24518 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24519 #endif
24522 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24523 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
24525 #ifdef __AARCH64EB__
24526 return __builtin_shuffle (__a, __b,
24527 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24528 #else
24529 return __builtin_shuffle (__a, __b,
24530 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24531 #endif
24534 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24535 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
24537 #ifdef __AARCH64EB__
24538 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24539 #else
24540 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24541 #endif
24544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24545 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
24547 #ifdef __AARCH64EB__
24548 return __builtin_shuffle (__a, __b,
24549 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24550 #else
24551 return __builtin_shuffle (__a, __b,
24552 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24553 #endif
24556 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24557 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
24559 #ifdef __AARCH64EB__
24560 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24561 #else
24562 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24563 #endif
24566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24567 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
24569 #ifdef __AARCH64EB__
24570 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24571 #else
24572 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24573 #endif
24576 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24577 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
24579 #ifdef __AARCH64EB__
24580 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24581 #else
24582 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24583 #endif
24586 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24587 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
24589 #ifdef __AARCH64EB__
24590 return __builtin_shuffle (__a, __b,
24591 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24592 #else
24593 return __builtin_shuffle (__a, __b,
24594 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24595 #endif
24598 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24599 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24601 #ifdef __AARCH64EB__
24602 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24603 #else
24604 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24605 #endif
24608 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24609 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24611 #ifdef __AARCH64EB__
24612 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24613 #else
24614 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24615 #endif
24618 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24619 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24621 #ifdef __AARCH64EB__
24622 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24623 #else
24624 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24625 #endif
24628 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24629 vtrn_f32 (float32x2_t a, float32x2_t b)
24631 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24634 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24635 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24637 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24640 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24641 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24643 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24646 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24647 vtrn_s8 (int8x8_t a, int8x8_t b)
24649 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24652 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24653 vtrn_s16 (int16x4_t a, int16x4_t b)
24655 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24658 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24659 vtrn_s32 (int32x2_t a, int32x2_t b)
24661 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24664 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24665 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24667 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24670 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24671 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24673 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24676 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24677 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24679 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24682 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24683 vtrnq_f32 (float32x4_t a, float32x4_t b)
24685 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24688 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24689 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24691 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24694 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24695 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24697 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24700 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24701 vtrnq_s8 (int8x16_t a, int8x16_t b)
24703 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24706 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24707 vtrnq_s16 (int16x8_t a, int16x8_t b)
24709 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24712 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24713 vtrnq_s32 (int32x4_t a, int32x4_t b)
24715 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24718 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24719 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24721 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24724 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24725 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24727 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24730 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24731 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24733 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24736 /* vtst */
24738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24739 vtst_s8 (int8x8_t __a, int8x8_t __b)
24741 return (uint8x8_t) ((__a & __b) != 0);
24744 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24745 vtst_s16 (int16x4_t __a, int16x4_t __b)
24747 return (uint16x4_t) ((__a & __b) != 0);
24750 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24751 vtst_s32 (int32x2_t __a, int32x2_t __b)
24753 return (uint32x2_t) ((__a & __b) != 0);
24756 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24757 vtst_s64 (int64x1_t __a, int64x1_t __b)
24759 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24763 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24765 return ((__a & __b) != 0);
24768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24769 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24771 return ((__a & __b) != 0);
24774 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24775 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24777 return ((__a & __b) != 0);
24780 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24781 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24783 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24786 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24787 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24789 return (uint8x16_t) ((__a & __b) != 0);
24792 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24793 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24795 return (uint16x8_t) ((__a & __b) != 0);
24798 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24799 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24801 return (uint32x4_t) ((__a & __b) != 0);
24804 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24805 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24807 return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24810 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24811 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24813 return ((__a & __b) != 0);
24816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24817 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24819 return ((__a & __b) != 0);
24822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24823 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24825 return ((__a & __b) != 0);
24828 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24829 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24831 return ((__a & __b) != __AARCH64_UINT64_C (0));
24834 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24835 vtstd_s64 (int64_t __a, int64_t __b)
24837 return (__a & __b) ? -1ll : 0ll;
24840 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24841 vtstd_u64 (uint64_t __a, uint64_t __b)
24843 return (__a & __b) ? -1ll : 0ll;
24846 /* vuqadd */
24848 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24849 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24851 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24854 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24855 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24857 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24860 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24861 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24863 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24866 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24867 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24869 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24872 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24873 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24875 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24879 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24881 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24884 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24885 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24887 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24890 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24891 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24893 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24896 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
24897 vuqaddb_s8 (int8_t __a, uint8_t __b)
24899 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24902 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
24903 vuqaddh_s16 (int16_t __a, uint16_t __b)
24905 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24908 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
24909 vuqadds_s32 (int32_t __a, uint32_t __b)
24911 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24914 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24915 vuqaddd_s64 (int64_t __a, uint64_t __b)
24917 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24920 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24921 __extension__ static __inline rettype \
24922 __attribute__ ((__always_inline__)) \
24923 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24925 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24926 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24929 #define __INTERLEAVE_LIST(op) \
24930 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24931 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24932 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24933 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24934 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24935 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24936 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24937 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24938 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24939 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24940 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24941 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24942 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24943 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24944 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24945 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24946 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24947 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24949 /* vuzp */
24951 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24952 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24954 #ifdef __AARCH64EB__
24955 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24956 #else
24957 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24958 #endif
24961 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24962 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24964 #ifdef __AARCH64EB__
24965 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24966 #else
24967 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24968 #endif
24971 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24972 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24974 #ifdef __AARCH64EB__
24975 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24976 #else
24977 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24978 #endif
24981 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24982 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24984 #ifdef __AARCH64EB__
24985 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24986 #else
24987 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24988 #endif
24991 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24992 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24994 #ifdef __AARCH64EB__
24995 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24996 #else
24997 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24998 #endif
25001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25002 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
25004 #ifdef __AARCH64EB__
25005 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25006 #else
25007 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25008 #endif
25011 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25012 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
25014 #ifdef __AARCH64EB__
25015 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25016 #else
25017 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25018 #endif
25021 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25022 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
25024 #ifdef __AARCH64EB__
25025 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
25026 #else
25027 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
25028 #endif
25031 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25032 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
25034 #ifdef __AARCH64EB__
25035 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25036 #else
25037 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25038 #endif
25041 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25042 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
25044 #ifdef __AARCH64EB__
25045 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25046 #else
25047 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25048 #endif
25051 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25052 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
25054 #ifdef __AARCH64EB__
25055 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25056 #else
25057 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25058 #endif
25061 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25062 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
25064 #ifdef __AARCH64EB__
25065 return __builtin_shuffle (__a, __b, (uint8x16_t)
25066 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25067 #else
25068 return __builtin_shuffle (__a, __b, (uint8x16_t)
25069 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25070 #endif
25073 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25074 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
25076 #ifdef __AARCH64EB__
25077 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25078 #else
25079 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25080 #endif
25083 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25084 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
25086 #ifdef __AARCH64EB__
25087 return __builtin_shuffle (__a, __b,
25088 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25089 #else
25090 return __builtin_shuffle (__a, __b,
25091 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25092 #endif
25095 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25096 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
25098 #ifdef __AARCH64EB__
25099 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25100 #else
25101 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25102 #endif
25105 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25106 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
25108 #ifdef __AARCH64EB__
25109 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25110 #else
25111 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25112 #endif
25115 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25116 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
25118 #ifdef __AARCH64EB__
25119 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25120 #else
25121 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25122 #endif
25125 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25126 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
25128 #ifdef __AARCH64EB__
25129 return __builtin_shuffle (__a, __b,
25130 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25131 #else
25132 return __builtin_shuffle (__a, __b,
25133 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25134 #endif
25137 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25138 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
25140 #ifdef __AARCH64EB__
25141 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25142 #else
25143 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25144 #endif
25147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25148 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
25150 #ifdef __AARCH64EB__
25151 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25152 #else
25153 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25154 #endif
25157 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25158 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
25160 #ifdef __AARCH64EB__
25161 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25162 #else
25163 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25164 #endif
25167 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25168 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
25170 #ifdef __AARCH64EB__
25171 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25172 #else
25173 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25174 #endif
25177 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25178 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
25180 #ifdef __AARCH64EB__
25181 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25182 #else
25183 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25184 #endif
25187 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25188 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
25190 #ifdef __AARCH64EB__
25191 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25192 #else
25193 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25194 #endif
25197 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25198 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
25200 #ifdef __AARCH64EB__
25201 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25202 #else
25203 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25204 #endif
25207 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25208 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
25210 #ifdef __AARCH64EB__
25211 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25212 #else
25213 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25214 #endif
25217 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25218 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
25220 #ifdef __AARCH64EB__
25221 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25222 #else
25223 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25224 #endif
25227 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25228 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
25230 #ifdef __AARCH64EB__
25231 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25232 #else
25233 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25234 #endif
25237 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25238 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
25240 #ifdef __AARCH64EB__
25241 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25242 #else
25243 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25244 #endif
25247 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25248 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
25250 #ifdef __AARCH64EB__
25251 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25252 #else
25253 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25254 #endif
25257 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25258 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
25260 #ifdef __AARCH64EB__
25261 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25262 #else
25263 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25264 #endif
25267 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25268 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
25270 #ifdef __AARCH64EB__
25271 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25272 #else
25273 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25274 #endif
25277 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25278 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
25280 #ifdef __AARCH64EB__
25281 return __builtin_shuffle (__a, __b,
25282 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25283 #else
25284 return __builtin_shuffle (__a, __b,
25285 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25286 #endif
25289 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25290 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
25292 #ifdef __AARCH64EB__
25293 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25294 #else
25295 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25296 #endif
25299 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25300 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
25302 #ifdef __AARCH64EB__
25303 return __builtin_shuffle (__a, __b,
25304 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25305 #else
25306 return __builtin_shuffle (__a, __b,
25307 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25308 #endif
25311 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25312 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
25314 #ifdef __AARCH64EB__
25315 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25316 #else
25317 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25318 #endif
25321 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25322 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
25324 #ifdef __AARCH64EB__
25325 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25326 #else
25327 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25328 #endif
25331 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25332 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
25334 #ifdef __AARCH64EB__
25335 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25336 #else
25337 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25338 #endif
25341 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25342 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
25344 #ifdef __AARCH64EB__
25345 return __builtin_shuffle (__a, __b, (uint8x16_t)
25346 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25347 #else
25348 return __builtin_shuffle (__a, __b, (uint8x16_t)
25349 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25350 #endif
25353 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25354 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
25356 #ifdef __AARCH64EB__
25357 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25358 #else
25359 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25360 #endif
25363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25364 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
25366 #ifdef __AARCH64EB__
25367 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25368 #else
25369 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25370 #endif
25373 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25374 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
25376 #ifdef __AARCH64EB__
25377 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25378 #else
25379 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25380 #endif
25383 __INTERLEAVE_LIST (uzp)
25385 /* vzip */
25387 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25388 vzip1_f32 (float32x2_t __a, float32x2_t __b)
25390 #ifdef __AARCH64EB__
25391 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25392 #else
25393 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25394 #endif
25397 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25398 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
25400 #ifdef __AARCH64EB__
25401 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25402 #else
25403 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25404 #endif
25407 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25408 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
25410 #ifdef __AARCH64EB__
25411 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25412 #else
25413 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25414 #endif
25417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25418 vzip1_s8 (int8x8_t __a, int8x8_t __b)
25420 #ifdef __AARCH64EB__
25421 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25422 #else
25423 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25424 #endif
25427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25428 vzip1_s16 (int16x4_t __a, int16x4_t __b)
25430 #ifdef __AARCH64EB__
25431 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25432 #else
25433 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25434 #endif
25437 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25438 vzip1_s32 (int32x2_t __a, int32x2_t __b)
25440 #ifdef __AARCH64EB__
25441 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25442 #else
25443 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25444 #endif
25447 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25448 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
25450 #ifdef __AARCH64EB__
25451 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25452 #else
25453 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25454 #endif
25457 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25458 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
25460 #ifdef __AARCH64EB__
25461 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25462 #else
25463 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25464 #endif
25467 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25468 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
25470 #ifdef __AARCH64EB__
25471 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25472 #else
25473 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25474 #endif
25477 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25478 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
25480 #ifdef __AARCH64EB__
25481 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25482 #else
25483 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25484 #endif
25487 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25488 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
25490 #ifdef __AARCH64EB__
25491 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25492 #else
25493 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25494 #endif
25497 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25498 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
25500 #ifdef __AARCH64EB__
25501 return __builtin_shuffle (__a, __b, (uint8x16_t)
25502 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25503 #else
25504 return __builtin_shuffle (__a, __b, (uint8x16_t)
25505 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25506 #endif
25509 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25510 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
25512 #ifdef __AARCH64EB__
25513 return __builtin_shuffle (__a, __b, (uint16x8_t)
25514 {12, 4, 13, 5, 14, 6, 15, 7});
25515 #else
25516 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25517 #endif
25520 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25521 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
25523 #ifdef __AARCH64EB__
25524 return __builtin_shuffle (__a, __b, (uint8x16_t)
25525 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25526 #else
25527 return __builtin_shuffle (__a, __b, (uint8x16_t)
25528 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25529 #endif
25532 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25533 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
25535 #ifdef __AARCH64EB__
25536 return __builtin_shuffle (__a, __b, (uint16x8_t)
25537 {12, 4, 13, 5, 14, 6, 15, 7});
25538 #else
25539 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25540 #endif
25543 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25544 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
25546 #ifdef __AARCH64EB__
25547 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25548 #else
25549 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25550 #endif
25553 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25554 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
25556 #ifdef __AARCH64EB__
25557 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25558 #else
25559 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25560 #endif
25563 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25564 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
25566 #ifdef __AARCH64EB__
25567 return __builtin_shuffle (__a, __b, (uint8x16_t)
25568 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25569 #else
25570 return __builtin_shuffle (__a, __b, (uint8x16_t)
25571 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25572 #endif
25575 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25576 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
25578 #ifdef __AARCH64EB__
25579 return __builtin_shuffle (__a, __b, (uint16x8_t)
25580 {12, 4, 13, 5, 14, 6, 15, 7});
25581 #else
25582 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25583 #endif
25586 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25587 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25589 #ifdef __AARCH64EB__
25590 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25591 #else
25592 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25593 #endif
25596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25597 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25599 #ifdef __AARCH64EB__
25600 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25601 #else
25602 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25603 #endif
25606 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25607 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25609 #ifdef __AARCH64EB__
25610 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25611 #else
25612 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25613 #endif
25616 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25617 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25619 #ifdef __AARCH64EB__
25620 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25621 #else
25622 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25623 #endif
25626 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25627 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25629 #ifdef __AARCH64EB__
25630 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25631 #else
25632 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25633 #endif
25636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25637 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25639 #ifdef __AARCH64EB__
25640 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25641 #else
25642 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25643 #endif
25646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25647 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25649 #ifdef __AARCH64EB__
25650 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25651 #else
25652 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25653 #endif
25656 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25657 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25659 #ifdef __AARCH64EB__
25660 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25661 #else
25662 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25663 #endif
25666 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25667 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25669 #ifdef __AARCH64EB__
25670 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25671 #else
25672 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25673 #endif
25676 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25677 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25679 #ifdef __AARCH64EB__
25680 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25681 #else
25682 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25683 #endif
25686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25687 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25689 #ifdef __AARCH64EB__
25690 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25691 #else
25692 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25693 #endif
25696 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25697 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25699 #ifdef __AARCH64EB__
25700 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25701 #else
25702 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25703 #endif
25706 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25707 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25709 #ifdef __AARCH64EB__
25710 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25711 #else
25712 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25713 #endif
25716 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25717 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25719 #ifdef __AARCH64EB__
25720 return __builtin_shuffle (__a, __b, (uint8x16_t)
25721 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25722 #else
25723 return __builtin_shuffle (__a, __b, (uint8x16_t)
25724 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25725 #endif
25728 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25729 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25731 #ifdef __AARCH64EB__
25732 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25733 #else
25734 return __builtin_shuffle (__a, __b, (uint16x8_t)
25735 {4, 12, 5, 13, 6, 14, 7, 15});
25736 #endif
25739 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25740 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25742 #ifdef __AARCH64EB__
25743 return __builtin_shuffle (__a, __b, (uint8x16_t)
25744 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25745 #else
25746 return __builtin_shuffle (__a, __b, (uint8x16_t)
25747 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25748 #endif
25751 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25752 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25754 #ifdef __AARCH64EB__
25755 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25756 #else
25757 return __builtin_shuffle (__a, __b, (uint16x8_t)
25758 {4, 12, 5, 13, 6, 14, 7, 15});
25759 #endif
25762 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25763 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25765 #ifdef __AARCH64EB__
25766 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25767 #else
25768 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25769 #endif
25772 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25773 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25775 #ifdef __AARCH64EB__
25776 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25777 #else
25778 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25779 #endif
25782 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25783 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25785 #ifdef __AARCH64EB__
25786 return __builtin_shuffle (__a, __b, (uint8x16_t)
25787 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25788 #else
25789 return __builtin_shuffle (__a, __b, (uint8x16_t)
25790 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25791 #endif
25794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25795 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25797 #ifdef __AARCH64EB__
25798 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25799 #else
25800 return __builtin_shuffle (__a, __b, (uint16x8_t)
25801 {4, 12, 5, 13, 6, 14, 7, 15});
25802 #endif
25805 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25806 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25808 #ifdef __AARCH64EB__
25809 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25810 #else
25811 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25812 #endif
25815 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25816 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25818 #ifdef __AARCH64EB__
25819 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25820 #else
25821 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25822 #endif
25825 __INTERLEAVE_LIST (zip)
25827 #undef __INTERLEAVE_LIST
25828 #undef __DEFINTERLEAVE
25830 /* End of optimal implementations in approved order. */
25832 #undef __aarch64_vget_lane_any
25833 #undef __aarch64_vget_lane_f32
25834 #undef __aarch64_vget_lane_f64
25835 #undef __aarch64_vget_lane_p8
25836 #undef __aarch64_vget_lane_p16
25837 #undef __aarch64_vget_lane_s8
25838 #undef __aarch64_vget_lane_s16
25839 #undef __aarch64_vget_lane_s32
25840 #undef __aarch64_vget_lane_s64
25841 #undef __aarch64_vget_lane_u8
25842 #undef __aarch64_vget_lane_u16
25843 #undef __aarch64_vget_lane_u32
25844 #undef __aarch64_vget_lane_u64
25846 #undef __aarch64_vgetq_lane_f32
25847 #undef __aarch64_vgetq_lane_f64
25848 #undef __aarch64_vgetq_lane_p8
25849 #undef __aarch64_vgetq_lane_p16
25850 #undef __aarch64_vgetq_lane_s8
25851 #undef __aarch64_vgetq_lane_s16
25852 #undef __aarch64_vgetq_lane_s32
25853 #undef __aarch64_vgetq_lane_s64
25854 #undef __aarch64_vgetq_lane_u8
25855 #undef __aarch64_vgetq_lane_u16
25856 #undef __aarch64_vgetq_lane_u32
25857 #undef __aarch64_vgetq_lane_u64
25859 #undef __aarch64_vdup_lane_any
25860 #undef __aarch64_vdup_lane_f32
25861 #undef __aarch64_vdup_lane_f64
25862 #undef __aarch64_vdup_lane_p8
25863 #undef __aarch64_vdup_lane_p16
25864 #undef __aarch64_vdup_lane_s8
25865 #undef __aarch64_vdup_lane_s16
25866 #undef __aarch64_vdup_lane_s32
25867 #undef __aarch64_vdup_lane_s64
25868 #undef __aarch64_vdup_lane_u8
25869 #undef __aarch64_vdup_lane_u16
25870 #undef __aarch64_vdup_lane_u32
25871 #undef __aarch64_vdup_lane_u64
25872 #undef __aarch64_vdup_laneq_f32
25873 #undef __aarch64_vdup_laneq_f64
25874 #undef __aarch64_vdup_laneq_p8
25875 #undef __aarch64_vdup_laneq_p16
25876 #undef __aarch64_vdup_laneq_s8
25877 #undef __aarch64_vdup_laneq_s16
25878 #undef __aarch64_vdup_laneq_s32
25879 #undef __aarch64_vdup_laneq_s64
25880 #undef __aarch64_vdup_laneq_u8
25881 #undef __aarch64_vdup_laneq_u16
25882 #undef __aarch64_vdup_laneq_u32
25883 #undef __aarch64_vdup_laneq_u64
25884 #undef __aarch64_vdupq_lane_f32
25885 #undef __aarch64_vdupq_lane_f64
25886 #undef __aarch64_vdupq_lane_p8
25887 #undef __aarch64_vdupq_lane_p16
25888 #undef __aarch64_vdupq_lane_s8
25889 #undef __aarch64_vdupq_lane_s16
25890 #undef __aarch64_vdupq_lane_s32
25891 #undef __aarch64_vdupq_lane_s64
25892 #undef __aarch64_vdupq_lane_u8
25893 #undef __aarch64_vdupq_lane_u16
25894 #undef __aarch64_vdupq_lane_u32
25895 #undef __aarch64_vdupq_lane_u64
25896 #undef __aarch64_vdupq_laneq_f32
25897 #undef __aarch64_vdupq_laneq_f64
25898 #undef __aarch64_vdupq_laneq_p8
25899 #undef __aarch64_vdupq_laneq_p16
25900 #undef __aarch64_vdupq_laneq_s8
25901 #undef __aarch64_vdupq_laneq_s16
25902 #undef __aarch64_vdupq_laneq_s32
25903 #undef __aarch64_vdupq_laneq_s64
25904 #undef __aarch64_vdupq_laneq_u8
25905 #undef __aarch64_vdupq_laneq_u16
25906 #undef __aarch64_vdupq_laneq_u32
25907 #undef __aarch64_vdupq_laneq_u64
25909 #endif