* config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8,
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blobfa5766787e9b490292a835834db98a1c3a5556e4
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef int64_t int64x1_t;
42 typedef int32_t int32x1_t;
43 typedef int16_t int16x1_t;
44 typedef int8_t int8x1_t;
45 typedef double float64x1_t;
46 typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef uint64_t uint64x1_t;
59 typedef uint32_t uint32x1_t;
60 typedef uint16_t uint16x1_t;
61 typedef uint8_t uint8x1_t;
62 typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68 typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70 typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72 typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74 typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76 typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78 typedef __builtin_aarch64_simd_poly64 poly64x2_t
79 __attribute__ ((__vector_size__ (16)));
80 typedef __builtin_aarch64_simd_uqi uint8x16_t
81 __attribute__ ((__vector_size__ (16)));
82 typedef __builtin_aarch64_simd_uhi uint16x8_t
83 __attribute__ ((__vector_size__ (16)));
84 typedef __builtin_aarch64_simd_usi uint32x4_t
85 __attribute__ ((__vector_size__ (16)));
86 typedef __builtin_aarch64_simd_udi uint64x2_t
87 __attribute__ ((__vector_size__ (16)));
89 typedef float float32_t;
90 typedef double float64_t;
91 typedef __builtin_aarch64_simd_poly8 poly8_t;
92 typedef __builtin_aarch64_simd_poly16 poly16_t;
93 typedef __builtin_aarch64_simd_poly64 poly64_t;
94 typedef __builtin_aarch64_simd_poly128 poly128_t;
96 typedef struct int8x8x2_t
98 int8x8_t val[2];
99 } int8x8x2_t;
101 typedef struct int8x16x2_t
103 int8x16_t val[2];
104 } int8x16x2_t;
106 typedef struct int16x4x2_t
108 int16x4_t val[2];
109 } int16x4x2_t;
111 typedef struct int16x8x2_t
113 int16x8_t val[2];
114 } int16x8x2_t;
116 typedef struct int32x2x2_t
118 int32x2_t val[2];
119 } int32x2x2_t;
121 typedef struct int32x4x2_t
123 int32x4_t val[2];
124 } int32x4x2_t;
126 typedef struct int64x1x2_t
128 int64x1_t val[2];
129 } int64x1x2_t;
131 typedef struct int64x2x2_t
133 int64x2_t val[2];
134 } int64x2x2_t;
136 typedef struct uint8x8x2_t
138 uint8x8_t val[2];
139 } uint8x8x2_t;
141 typedef struct uint8x16x2_t
143 uint8x16_t val[2];
144 } uint8x16x2_t;
146 typedef struct uint16x4x2_t
148 uint16x4_t val[2];
149 } uint16x4x2_t;
151 typedef struct uint16x8x2_t
153 uint16x8_t val[2];
154 } uint16x8x2_t;
156 typedef struct uint32x2x2_t
158 uint32x2_t val[2];
159 } uint32x2x2_t;
161 typedef struct uint32x4x2_t
163 uint32x4_t val[2];
164 } uint32x4x2_t;
166 typedef struct uint64x1x2_t
168 uint64x1_t val[2];
169 } uint64x1x2_t;
171 typedef struct uint64x2x2_t
173 uint64x2_t val[2];
174 } uint64x2x2_t;
176 typedef struct float32x2x2_t
178 float32x2_t val[2];
179 } float32x2x2_t;
181 typedef struct float32x4x2_t
183 float32x4_t val[2];
184 } float32x4x2_t;
186 typedef struct float64x2x2_t
188 float64x2_t val[2];
189 } float64x2x2_t;
191 typedef struct float64x1x2_t
193 float64x1_t val[2];
194 } float64x1x2_t;
196 typedef struct poly8x8x2_t
198 poly8x8_t val[2];
199 } poly8x8x2_t;
201 typedef struct poly8x16x2_t
203 poly8x16_t val[2];
204 } poly8x16x2_t;
206 typedef struct poly16x4x2_t
208 poly16x4_t val[2];
209 } poly16x4x2_t;
211 typedef struct poly16x8x2_t
213 poly16x8_t val[2];
214 } poly16x8x2_t;
216 typedef struct int8x8x3_t
218 int8x8_t val[3];
219 } int8x8x3_t;
221 typedef struct int8x16x3_t
223 int8x16_t val[3];
224 } int8x16x3_t;
226 typedef struct int16x4x3_t
228 int16x4_t val[3];
229 } int16x4x3_t;
231 typedef struct int16x8x3_t
233 int16x8_t val[3];
234 } int16x8x3_t;
236 typedef struct int32x2x3_t
238 int32x2_t val[3];
239 } int32x2x3_t;
241 typedef struct int32x4x3_t
243 int32x4_t val[3];
244 } int32x4x3_t;
246 typedef struct int64x1x3_t
248 int64x1_t val[3];
249 } int64x1x3_t;
251 typedef struct int64x2x3_t
253 int64x2_t val[3];
254 } int64x2x3_t;
256 typedef struct uint8x8x3_t
258 uint8x8_t val[3];
259 } uint8x8x3_t;
261 typedef struct uint8x16x3_t
263 uint8x16_t val[3];
264 } uint8x16x3_t;
266 typedef struct uint16x4x3_t
268 uint16x4_t val[3];
269 } uint16x4x3_t;
271 typedef struct uint16x8x3_t
273 uint16x8_t val[3];
274 } uint16x8x3_t;
276 typedef struct uint32x2x3_t
278 uint32x2_t val[3];
279 } uint32x2x3_t;
281 typedef struct uint32x4x3_t
283 uint32x4_t val[3];
284 } uint32x4x3_t;
286 typedef struct uint64x1x3_t
288 uint64x1_t val[3];
289 } uint64x1x3_t;
291 typedef struct uint64x2x3_t
293 uint64x2_t val[3];
294 } uint64x2x3_t;
296 typedef struct float32x2x3_t
298 float32x2_t val[3];
299 } float32x2x3_t;
301 typedef struct float32x4x3_t
303 float32x4_t val[3];
304 } float32x4x3_t;
306 typedef struct float64x2x3_t
308 float64x2_t val[3];
309 } float64x2x3_t;
311 typedef struct float64x1x3_t
313 float64x1_t val[3];
314 } float64x1x3_t;
316 typedef struct poly8x8x3_t
318 poly8x8_t val[3];
319 } poly8x8x3_t;
321 typedef struct poly8x16x3_t
323 poly8x16_t val[3];
324 } poly8x16x3_t;
326 typedef struct poly16x4x3_t
328 poly16x4_t val[3];
329 } poly16x4x3_t;
331 typedef struct poly16x8x3_t
333 poly16x8_t val[3];
334 } poly16x8x3_t;
336 typedef struct int8x8x4_t
338 int8x8_t val[4];
339 } int8x8x4_t;
341 typedef struct int8x16x4_t
343 int8x16_t val[4];
344 } int8x16x4_t;
346 typedef struct int16x4x4_t
348 int16x4_t val[4];
349 } int16x4x4_t;
351 typedef struct int16x8x4_t
353 int16x8_t val[4];
354 } int16x8x4_t;
356 typedef struct int32x2x4_t
358 int32x2_t val[4];
359 } int32x2x4_t;
361 typedef struct int32x4x4_t
363 int32x4_t val[4];
364 } int32x4x4_t;
366 typedef struct int64x1x4_t
368 int64x1_t val[4];
369 } int64x1x4_t;
371 typedef struct int64x2x4_t
373 int64x2_t val[4];
374 } int64x2x4_t;
376 typedef struct uint8x8x4_t
378 uint8x8_t val[4];
379 } uint8x8x4_t;
381 typedef struct uint8x16x4_t
383 uint8x16_t val[4];
384 } uint8x16x4_t;
386 typedef struct uint16x4x4_t
388 uint16x4_t val[4];
389 } uint16x4x4_t;
391 typedef struct uint16x8x4_t
393 uint16x8_t val[4];
394 } uint16x8x4_t;
396 typedef struct uint32x2x4_t
398 uint32x2_t val[4];
399 } uint32x2x4_t;
401 typedef struct uint32x4x4_t
403 uint32x4_t val[4];
404 } uint32x4x4_t;
406 typedef struct uint64x1x4_t
408 uint64x1_t val[4];
409 } uint64x1x4_t;
411 typedef struct uint64x2x4_t
413 uint64x2_t val[4];
414 } uint64x2x4_t;
416 typedef struct float32x2x4_t
418 float32x2_t val[4];
419 } float32x2x4_t;
421 typedef struct float32x4x4_t
423 float32x4_t val[4];
424 } float32x4x4_t;
426 typedef struct float64x2x4_t
428 float64x2_t val[4];
429 } float64x2x4_t;
431 typedef struct float64x1x4_t
433 float64x1_t val[4];
434 } float64x1x4_t;
436 typedef struct poly8x8x4_t
438 poly8x8_t val[4];
439 } poly8x8x4_t;
441 typedef struct poly8x16x4_t
443 poly8x16_t val[4];
444 } poly8x16x4_t;
446 typedef struct poly16x4x4_t
448 poly16x4_t val[4];
449 } poly16x4x4_t;
451 typedef struct poly16x8x4_t
453 poly16x8_t val[4];
454 } poly16x8x4_t;
456 /* vget_lane internal macros. */
458 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
459 (__cast_ret \
460 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
462 #define __aarch64_vget_lane_f32(__a, __b) \
463 __aarch64_vget_lane_any (v2sf, , , __a, __b)
464 #define __aarch64_vget_lane_f64(__a, __b) (__a)
466 #define __aarch64_vget_lane_p8(__a, __b) \
467 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
468 #define __aarch64_vget_lane_p16(__a, __b) \
469 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
471 #define __aarch64_vget_lane_s8(__a, __b) \
472 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
473 #define __aarch64_vget_lane_s16(__a, __b) \
474 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
475 #define __aarch64_vget_lane_s32(__a, __b) \
476 __aarch64_vget_lane_any (v2si, , ,__a, __b)
477 #define __aarch64_vget_lane_s64(__a, __b) (__a)
479 #define __aarch64_vget_lane_u8(__a, __b) \
480 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
481 #define __aarch64_vget_lane_u16(__a, __b) \
482 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
483 #define __aarch64_vget_lane_u32(__a, __b) \
484 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
485 #define __aarch64_vget_lane_u64(__a, __b) (__a)
487 #define __aarch64_vgetq_lane_f32(__a, __b) \
488 __aarch64_vget_lane_any (v4sf, , , __a, __b)
489 #define __aarch64_vgetq_lane_f64(__a, __b) \
490 __aarch64_vget_lane_any (v2df, , , __a, __b)
492 #define __aarch64_vgetq_lane_p8(__a, __b) \
493 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
494 #define __aarch64_vgetq_lane_p16(__a, __b) \
495 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
497 #define __aarch64_vgetq_lane_s8(__a, __b) \
498 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
499 #define __aarch64_vgetq_lane_s16(__a, __b) \
500 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
501 #define __aarch64_vgetq_lane_s32(__a, __b) \
502 __aarch64_vget_lane_any (v4si, , ,__a, __b)
503 #define __aarch64_vgetq_lane_s64(__a, __b) \
504 __aarch64_vget_lane_any (v2di, , ,__a, __b)
506 #define __aarch64_vgetq_lane_u8(__a, __b) \
507 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
508 #define __aarch64_vgetq_lane_u16(__a, __b) \
509 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
510 #define __aarch64_vgetq_lane_u32(__a, __b) \
511 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
512 #define __aarch64_vgetq_lane_u64(__a, __b) \
513 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
515 /* __aarch64_vdup_lane internal macros. */
516 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
517 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
519 #define __aarch64_vdup_lane_f32(__a, __b) \
520 __aarch64_vdup_lane_any (f32, , , __a, __b)
521 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
522 #define __aarch64_vdup_lane_p8(__a, __b) \
523 __aarch64_vdup_lane_any (p8, , , __a, __b)
524 #define __aarch64_vdup_lane_p16(__a, __b) \
525 __aarch64_vdup_lane_any (p16, , , __a, __b)
526 #define __aarch64_vdup_lane_s8(__a, __b) \
527 __aarch64_vdup_lane_any (s8, , , __a, __b)
528 #define __aarch64_vdup_lane_s16(__a, __b) \
529 __aarch64_vdup_lane_any (s16, , , __a, __b)
530 #define __aarch64_vdup_lane_s32(__a, __b) \
531 __aarch64_vdup_lane_any (s32, , , __a, __b)
532 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
533 #define __aarch64_vdup_lane_u8(__a, __b) \
534 __aarch64_vdup_lane_any (u8, , , __a, __b)
535 #define __aarch64_vdup_lane_u16(__a, __b) \
536 __aarch64_vdup_lane_any (u16, , , __a, __b)
537 #define __aarch64_vdup_lane_u32(__a, __b) \
538 __aarch64_vdup_lane_any (u32, , , __a, __b)
539 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
541 /* __aarch64_vdup_laneq internal macros. */
542 #define __aarch64_vdup_laneq_f32(__a, __b) \
543 __aarch64_vdup_lane_any (f32, , q, __a, __b)
544 #define __aarch64_vdup_laneq_f64(__a, __b) \
545 __aarch64_vdup_lane_any (f64, , q, __a, __b)
546 #define __aarch64_vdup_laneq_p8(__a, __b) \
547 __aarch64_vdup_lane_any (p8, , q, __a, __b)
548 #define __aarch64_vdup_laneq_p16(__a, __b) \
549 __aarch64_vdup_lane_any (p16, , q, __a, __b)
550 #define __aarch64_vdup_laneq_s8(__a, __b) \
551 __aarch64_vdup_lane_any (s8, , q, __a, __b)
552 #define __aarch64_vdup_laneq_s16(__a, __b) \
553 __aarch64_vdup_lane_any (s16, , q, __a, __b)
554 #define __aarch64_vdup_laneq_s32(__a, __b) \
555 __aarch64_vdup_lane_any (s32, , q, __a, __b)
556 #define __aarch64_vdup_laneq_s64(__a, __b) \
557 __aarch64_vdup_lane_any (s64, , q, __a, __b)
558 #define __aarch64_vdup_laneq_u8(__a, __b) \
559 __aarch64_vdup_lane_any (u8, , q, __a, __b)
560 #define __aarch64_vdup_laneq_u16(__a, __b) \
561 __aarch64_vdup_lane_any (u16, , q, __a, __b)
562 #define __aarch64_vdup_laneq_u32(__a, __b) \
563 __aarch64_vdup_lane_any (u32, , q, __a, __b)
564 #define __aarch64_vdup_laneq_u64(__a, __b) \
565 __aarch64_vdup_lane_any (u64, , q, __a, __b)
567 /* __aarch64_vdupq_lane internal macros. */
568 #define __aarch64_vdupq_lane_f32(__a, __b) \
569 __aarch64_vdup_lane_any (f32, q, , __a, __b)
570 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
571 #define __aarch64_vdupq_lane_p8(__a, __b) \
572 __aarch64_vdup_lane_any (p8, q, , __a, __b)
573 #define __aarch64_vdupq_lane_p16(__a, __b) \
574 __aarch64_vdup_lane_any (p16, q, , __a, __b)
575 #define __aarch64_vdupq_lane_s8(__a, __b) \
576 __aarch64_vdup_lane_any (s8, q, , __a, __b)
577 #define __aarch64_vdupq_lane_s16(__a, __b) \
578 __aarch64_vdup_lane_any (s16, q, , __a, __b)
579 #define __aarch64_vdupq_lane_s32(__a, __b) \
580 __aarch64_vdup_lane_any (s32, q, , __a, __b)
581 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
582 #define __aarch64_vdupq_lane_u8(__a, __b) \
583 __aarch64_vdup_lane_any (u8, q, , __a, __b)
584 #define __aarch64_vdupq_lane_u16(__a, __b) \
585 __aarch64_vdup_lane_any (u16, q, , __a, __b)
586 #define __aarch64_vdupq_lane_u32(__a, __b) \
587 __aarch64_vdup_lane_any (u32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
590 /* __aarch64_vdupq_laneq internal macros. */
591 #define __aarch64_vdupq_laneq_f32(__a, __b) \
592 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
593 #define __aarch64_vdupq_laneq_f64(__a, __b) \
594 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
595 #define __aarch64_vdupq_laneq_p8(__a, __b) \
596 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
597 #define __aarch64_vdupq_laneq_p16(__a, __b) \
598 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
599 #define __aarch64_vdupq_laneq_s8(__a, __b) \
600 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
601 #define __aarch64_vdupq_laneq_s16(__a, __b) \
602 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
603 #define __aarch64_vdupq_laneq_s32(__a, __b) \
604 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
605 #define __aarch64_vdupq_laneq_s64(__a, __b) \
606 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
607 #define __aarch64_vdupq_laneq_u8(__a, __b) \
608 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
609 #define __aarch64_vdupq_laneq_u16(__a, __b) \
610 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
611 #define __aarch64_vdupq_laneq_u32(__a, __b) \
612 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
613 #define __aarch64_vdupq_laneq_u64(__a, __b) \
614 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
616 /* vadd */
617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
618 vadd_s8 (int8x8_t __a, int8x8_t __b)
620 return __a + __b;
623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
624 vadd_s16 (int16x4_t __a, int16x4_t __b)
626 return __a + __b;
629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
630 vadd_s32 (int32x2_t __a, int32x2_t __b)
632 return __a + __b;
635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
636 vadd_f32 (float32x2_t __a, float32x2_t __b)
638 return __a + __b;
641 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
642 vadd_f64 (float64x1_t __a, float64x1_t __b)
644 return __a + __b;
647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
648 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
650 return __a + __b;
653 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
654 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
656 return __a + __b;
659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
660 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
662 return __a + __b;
665 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
666 vadd_s64 (int64x1_t __a, int64x1_t __b)
668 return __a + __b;
671 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
672 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
674 return __a + __b;
677 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
678 vaddq_s8 (int8x16_t __a, int8x16_t __b)
680 return __a + __b;
683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
684 vaddq_s16 (int16x8_t __a, int16x8_t __b)
686 return __a + __b;
689 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
690 vaddq_s32 (int32x4_t __a, int32x4_t __b)
692 return __a + __b;
695 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
696 vaddq_s64 (int64x2_t __a, int64x2_t __b)
698 return __a + __b;
701 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
702 vaddq_f32 (float32x4_t __a, float32x4_t __b)
704 return __a + __b;
707 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
708 vaddq_f64 (float64x2_t __a, float64x2_t __b)
710 return __a + __b;
713 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
714 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
716 return __a + __b;
719 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
720 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
722 return __a + __b;
725 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
726 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
728 return __a + __b;
731 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
732 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
734 return __a + __b;
737 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
738 vaddl_s8 (int8x8_t __a, int8x8_t __b)
740 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
743 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
744 vaddl_s16 (int16x4_t __a, int16x4_t __b)
746 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
749 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
750 vaddl_s32 (int32x2_t __a, int32x2_t __b)
752 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
755 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
756 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
758 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
759 (int8x8_t) __b);
762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
763 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
765 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
766 (int16x4_t) __b);
769 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
770 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
772 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
773 (int32x2_t) __b);
776 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
777 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
779 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
782 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
783 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
785 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
788 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
789 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
791 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
795 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
797 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
798 (int8x16_t) __b);
801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
802 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
804 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
805 (int16x8_t) __b);
808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
809 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
811 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
812 (int32x4_t) __b);
815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
816 vaddw_s8 (int16x8_t __a, int8x8_t __b)
818 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
822 vaddw_s16 (int32x4_t __a, int16x4_t __b)
824 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
827 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
828 vaddw_s32 (int64x2_t __a, int32x2_t __b)
830 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
834 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
836 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
837 (int8x8_t) __b);
840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
841 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
843 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
844 (int16x4_t) __b);
847 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
848 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
850 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
851 (int32x2_t) __b);
854 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
855 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
857 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
860 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
861 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
863 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
866 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
867 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
869 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
872 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
873 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
875 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
876 (int8x16_t) __b);
879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
880 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
882 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
883 (int16x8_t) __b);
886 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
887 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
889 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
890 (int32x4_t) __b);
893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
894 vhadd_s8 (int8x8_t __a, int8x8_t __b)
896 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
900 vhadd_s16 (int16x4_t __a, int16x4_t __b)
902 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
905 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
906 vhadd_s32 (int32x2_t __a, int32x2_t __b)
908 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
911 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
912 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
914 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
915 (int8x8_t) __b);
918 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
919 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
921 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
922 (int16x4_t) __b);
925 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
926 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
928 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
929 (int32x2_t) __b);
932 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
933 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
935 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
938 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
939 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
941 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
944 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
945 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
947 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
950 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
951 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
953 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
954 (int8x16_t) __b);
957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
958 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
960 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
961 (int16x8_t) __b);
964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
965 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
967 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
968 (int32x4_t) __b);
971 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
972 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
974 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
978 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
980 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
984 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
986 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
990 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
992 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
993 (int8x8_t) __b);
996 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
997 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
999 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1000 (int16x4_t) __b);
1003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1004 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1006 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1007 (int32x2_t) __b);
1010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1011 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1013 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1016 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1017 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1019 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1022 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1023 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1025 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1028 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1029 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1031 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1032 (int8x16_t) __b);
1035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1036 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1038 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1039 (int16x8_t) __b);
1042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1043 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1045 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1046 (int32x4_t) __b);
1049 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1050 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1052 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1055 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1056 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1058 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1061 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1062 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1064 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1068 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1070 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1071 (int16x8_t) __b);
1074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1075 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1077 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1078 (int32x4_t) __b);
1081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1082 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1084 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1085 (int64x2_t) __b);
1088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1089 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1091 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1095 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1097 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1100 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1101 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1103 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1106 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1107 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1109 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1110 (int16x8_t) __b);
1113 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1114 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1116 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1117 (int32x4_t) __b);
1120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1121 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1123 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1124 (int64x2_t) __b);
1127 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1128 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1130 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1133 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1134 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1136 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1140 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1142 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1145 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1146 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1148 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1149 (int16x8_t) __b,
1150 (int16x8_t) __c);
1153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1154 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1156 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1157 (int32x4_t) __b,
1158 (int32x4_t) __c);
1161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1162 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1164 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1165 (int64x2_t) __b,
1166 (int64x2_t) __c);
1169 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1170 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1172 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1175 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1176 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1178 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1181 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1182 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1184 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1187 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1188 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1190 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1191 (int16x8_t) __b,
1192 (int16x8_t) __c);
1195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1196 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1198 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1199 (int32x4_t) __b,
1200 (int32x4_t) __c);
1203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1204 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1206 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1207 (int64x2_t) __b,
1208 (int64x2_t) __c);
1211 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1212 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1214 return __a / __b;
1217 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1218 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1220 return __a / __b;
1223 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1224 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1226 return __a / __b;
1229 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1230 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1232 return __a / __b;
1235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1236 vmul_s8 (int8x8_t __a, int8x8_t __b)
1238 return __a * __b;
1241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1242 vmul_s16 (int16x4_t __a, int16x4_t __b)
1244 return __a * __b;
1247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1248 vmul_s32 (int32x2_t __a, int32x2_t __b)
1250 return __a * __b;
1253 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1254 vmul_f32 (float32x2_t __a, float32x2_t __b)
1256 return __a * __b;
1259 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1260 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1262 return __a * __b;
1265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1266 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1268 return __a * __b;
1271 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1272 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1274 return __a * __b;
1277 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1278 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1280 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1281 (int8x8_t) __b);
1284 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1285 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1287 return __a * __b;
1290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1291 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1293 return __a * __b;
1296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1297 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1299 return __a * __b;
1302 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1303 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1305 return __a * __b;
1308 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1309 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1311 return __a * __b;
1314 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1315 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1317 return __a * __b;
1320 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1321 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1323 return __a * __b;
1326 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1327 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1329 return __a * __b;
1332 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1333 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1335 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1336 (int8x16_t) __b);
1339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1340 vand_s8 (int8x8_t __a, int8x8_t __b)
1342 return __a & __b;
1345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1346 vand_s16 (int16x4_t __a, int16x4_t __b)
1348 return __a & __b;
1351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1352 vand_s32 (int32x2_t __a, int32x2_t __b)
1354 return __a & __b;
1357 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1358 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1360 return __a & __b;
1363 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1364 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1366 return __a & __b;
1369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1370 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1372 return __a & __b;
1375 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1376 vand_s64 (int64x1_t __a, int64x1_t __b)
1378 return __a & __b;
1381 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1382 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1384 return __a & __b;
1387 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1388 vandq_s8 (int8x16_t __a, int8x16_t __b)
1390 return __a & __b;
1393 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1394 vandq_s16 (int16x8_t __a, int16x8_t __b)
1396 return __a & __b;
1399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1400 vandq_s32 (int32x4_t __a, int32x4_t __b)
1402 return __a & __b;
1405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1406 vandq_s64 (int64x2_t __a, int64x2_t __b)
1408 return __a & __b;
1411 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1412 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1414 return __a & __b;
1417 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1418 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1420 return __a & __b;
1423 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1424 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1426 return __a & __b;
1429 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1430 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1432 return __a & __b;
1435 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1436 vorr_s8 (int8x8_t __a, int8x8_t __b)
1438 return __a | __b;
1441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1442 vorr_s16 (int16x4_t __a, int16x4_t __b)
1444 return __a | __b;
1447 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1448 vorr_s32 (int32x2_t __a, int32x2_t __b)
1450 return __a | __b;
1453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1454 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1456 return __a | __b;
1459 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1460 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1462 return __a | __b;
1465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1466 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1468 return __a | __b;
1471 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1472 vorr_s64 (int64x1_t __a, int64x1_t __b)
1474 return __a | __b;
1477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1478 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1480 return __a | __b;
1483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1484 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1486 return __a | __b;
1489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1490 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1492 return __a | __b;
1495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1496 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1498 return __a | __b;
1501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1502 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1504 return __a | __b;
1507 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1508 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1510 return __a | __b;
1513 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1514 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1516 return __a | __b;
1519 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1520 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1522 return __a | __b;
1525 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1526 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1528 return __a | __b;
1531 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1532 veor_s8 (int8x8_t __a, int8x8_t __b)
1534 return __a ^ __b;
1537 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1538 veor_s16 (int16x4_t __a, int16x4_t __b)
1540 return __a ^ __b;
1543 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1544 veor_s32 (int32x2_t __a, int32x2_t __b)
1546 return __a ^ __b;
1549 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1550 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1552 return __a ^ __b;
1555 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1556 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1558 return __a ^ __b;
1561 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1562 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1564 return __a ^ __b;
1567 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1568 veor_s64 (int64x1_t __a, int64x1_t __b)
1570 return __a ^ __b;
1573 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1574 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1576 return __a ^ __b;
1579 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1580 veorq_s8 (int8x16_t __a, int8x16_t __b)
1582 return __a ^ __b;
1585 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1586 veorq_s16 (int16x8_t __a, int16x8_t __b)
1588 return __a ^ __b;
1591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1592 veorq_s32 (int32x4_t __a, int32x4_t __b)
1594 return __a ^ __b;
1597 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1598 veorq_s64 (int64x2_t __a, int64x2_t __b)
1600 return __a ^ __b;
1603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1604 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1606 return __a ^ __b;
1609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1610 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1612 return __a ^ __b;
1615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1616 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1618 return __a ^ __b;
1621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1622 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1624 return __a ^ __b;
1627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1628 vbic_s8 (int8x8_t __a, int8x8_t __b)
1630 return __a & ~__b;
1633 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1634 vbic_s16 (int16x4_t __a, int16x4_t __b)
1636 return __a & ~__b;
1639 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1640 vbic_s32 (int32x2_t __a, int32x2_t __b)
1642 return __a & ~__b;
1645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1646 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1648 return __a & ~__b;
1651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1652 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1654 return __a & ~__b;
1657 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1658 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1660 return __a & ~__b;
1663 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1664 vbic_s64 (int64x1_t __a, int64x1_t __b)
1666 return __a & ~__b;
1669 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1670 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1672 return __a & ~__b;
1675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1676 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1678 return __a & ~__b;
1681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1682 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1684 return __a & ~__b;
1687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1688 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1690 return __a & ~__b;
1693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1694 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1696 return __a & ~__b;
1699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1700 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1702 return __a & ~__b;
1705 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1706 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1708 return __a & ~__b;
1711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1712 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1714 return __a & ~__b;
1717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1718 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1720 return __a & ~__b;
1723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1724 vorn_s8 (int8x8_t __a, int8x8_t __b)
1726 return __a | ~__b;
1729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1730 vorn_s16 (int16x4_t __a, int16x4_t __b)
1732 return __a | ~__b;
1735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1736 vorn_s32 (int32x2_t __a, int32x2_t __b)
1738 return __a | ~__b;
1741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1742 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1744 return __a | ~__b;
1747 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1748 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1750 return __a | ~__b;
1753 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1754 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1756 return __a | ~__b;
1759 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1760 vorn_s64 (int64x1_t __a, int64x1_t __b)
1762 return __a | ~__b;
1765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1766 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1768 return __a | ~__b;
1771 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1772 vornq_s8 (int8x16_t __a, int8x16_t __b)
1774 return __a | ~__b;
1777 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1778 vornq_s16 (int16x8_t __a, int16x8_t __b)
1780 return __a | ~__b;
1783 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1784 vornq_s32 (int32x4_t __a, int32x4_t __b)
1786 return __a | ~__b;
1789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1790 vornq_s64 (int64x2_t __a, int64x2_t __b)
1792 return __a | ~__b;
1795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1796 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1798 return __a | ~__b;
1801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1802 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1804 return __a | ~__b;
1807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1808 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1810 return __a | ~__b;
1813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1814 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1816 return __a | ~__b;
1819 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1820 vsub_s8 (int8x8_t __a, int8x8_t __b)
1822 return __a - __b;
1825 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1826 vsub_s16 (int16x4_t __a, int16x4_t __b)
1828 return __a - __b;
1831 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1832 vsub_s32 (int32x2_t __a, int32x2_t __b)
1834 return __a - __b;
1837 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1838 vsub_f32 (float32x2_t __a, float32x2_t __b)
1840 return __a - __b;
1843 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1844 vsub_f64 (float64x1_t __a, float64x1_t __b)
1846 return __a - __b;
1849 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1850 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1852 return __a - __b;
1855 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1856 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1858 return __a - __b;
1861 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1862 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1864 return __a - __b;
1867 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1868 vsub_s64 (int64x1_t __a, int64x1_t __b)
1870 return __a - __b;
1873 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1874 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1876 return __a - __b;
1879 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1880 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1882 return __a - __b;
1885 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1886 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1888 return __a - __b;
1891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1892 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1894 return __a - __b;
1897 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1898 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1900 return __a - __b;
1903 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1904 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1906 return __a - __b;
1909 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1910 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1912 return __a - __b;
1915 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1916 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1918 return __a - __b;
1921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1922 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1924 return __a - __b;
1927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1928 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1930 return __a - __b;
1933 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1934 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1936 return __a - __b;
1939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1940 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1942 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1946 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1948 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1952 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1954 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1958 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1960 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1961 (int8x8_t) __b);
1964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1965 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1967 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1968 (int16x4_t) __b);
1971 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1972 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1974 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1975 (int32x2_t) __b);
1978 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1979 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1981 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1984 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1985 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1987 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1990 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1991 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1993 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1997 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1999 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2000 (int8x16_t) __b);
2003 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2004 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2006 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2007 (int16x8_t) __b);
2010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2011 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2013 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2014 (int32x4_t) __b);
2017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2018 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2020 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2024 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2026 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2029 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2030 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2032 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2036 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2038 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2039 (int8x8_t) __b);
2042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2043 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2045 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2046 (int16x4_t) __b);
2049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2050 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2052 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2053 (int32x2_t) __b);
2056 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2057 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2059 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2062 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2063 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2065 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2069 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2071 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2074 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2075 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2077 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2078 (int8x16_t) __b);
2081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2082 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2084 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2085 (int16x8_t) __b);
2088 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2089 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2091 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2092 (int32x4_t) __b);
2095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2096 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2098 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2101 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2102 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2104 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2107 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2108 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2110 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2113 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2114 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2116 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2120 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2122 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2123 (int8x8_t) __b);
2126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2127 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2129 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2130 (int16x4_t) __b);
2133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2134 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2136 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2137 (int32x2_t) __b);
2140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2141 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2143 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2144 (int64x1_t) __b);
2147 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2148 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2150 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2153 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2154 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2156 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2160 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2162 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2165 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2166 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2168 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2171 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2172 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2174 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2175 (int8x16_t) __b);
2178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2179 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2181 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2182 (int16x8_t) __b);
2185 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2186 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2188 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2189 (int32x4_t) __b);
2192 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2193 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2195 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2196 (int64x2_t) __b);
2199 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2200 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2202 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2206 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2208 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2212 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2214 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2217 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2218 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2220 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2223 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2224 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2226 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2227 (int8x8_t) __b);
2230 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2231 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2233 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2234 (int16x4_t) __b);
2237 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2238 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2240 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2241 (int32x2_t) __b);
2244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2245 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2247 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2248 (int64x1_t) __b);
2251 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2252 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2254 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2257 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2258 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2260 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2264 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2266 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2269 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2270 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2272 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2276 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2278 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2279 (int8x16_t) __b);
2282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2283 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2285 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2286 (int16x8_t) __b);
2289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2290 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2292 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2293 (int32x4_t) __b);
2296 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2297 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2299 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2300 (int64x2_t) __b);
2303 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2304 vqneg_s8 (int8x8_t __a)
2306 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2309 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2310 vqneg_s16 (int16x4_t __a)
2312 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2315 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2316 vqneg_s32 (int32x2_t __a)
2318 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2321 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2322 vqneg_s64 (int64x1_t __a)
2324 return __builtin_aarch64_sqnegdi (__a);
2327 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2328 vqnegq_s8 (int8x16_t __a)
2330 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2333 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2334 vqnegq_s16 (int16x8_t __a)
2336 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2339 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2340 vqnegq_s32 (int32x4_t __a)
2342 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2345 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2346 vqabs_s8 (int8x8_t __a)
2348 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2351 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2352 vqabs_s16 (int16x4_t __a)
2354 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2357 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2358 vqabs_s32 (int32x2_t __a)
2360 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2363 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2364 vqabs_s64 (int64x1_t __a)
2366 return __builtin_aarch64_sqabsdi (__a);
2369 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2370 vqabsq_s8 (int8x16_t __a)
2372 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2375 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2376 vqabsq_s16 (int16x8_t __a)
2378 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2381 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2382 vqabsq_s32 (int32x4_t __a)
2384 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2387 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2388 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2390 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2393 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2394 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2396 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2399 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2400 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2402 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2406 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2408 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2412 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2414 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2417 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2418 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2420 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2424 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2426 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2429 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2430 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2432 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2435 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2436 vcreate_s8 (uint64_t __a)
2438 return (int8x8_t) __a;
2441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2442 vcreate_s16 (uint64_t __a)
2444 return (int16x4_t) __a;
2447 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2448 vcreate_s32 (uint64_t __a)
2450 return (int32x2_t) __a;
2453 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2454 vcreate_s64 (uint64_t __a)
2456 return (int64x1_t) __a;
2459 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2460 vcreate_f32 (uint64_t __a)
2462 return (float32x2_t) __a;
2465 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2466 vcreate_u8 (uint64_t __a)
2468 return (uint8x8_t) __a;
2471 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2472 vcreate_u16 (uint64_t __a)
2474 return (uint16x4_t) __a;
2477 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2478 vcreate_u32 (uint64_t __a)
2480 return (uint32x2_t) __a;
2483 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2484 vcreate_u64 (uint64_t __a)
2486 return (uint64x1_t) __a;
2489 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2490 vcreate_f64 (uint64_t __a)
2492 return (float64x1_t) __builtin_aarch64_createdf (__a);
2495 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2496 vcreate_p8 (uint64_t __a)
2498 return (poly8x8_t) __a;
2501 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2502 vcreate_p16 (uint64_t __a)
2504 return (poly16x4_t) __a;
2507 /* vget_lane */
2509 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2510 vget_lane_f32 (float32x2_t __a, const int __b)
2512 return __aarch64_vget_lane_f32 (__a, __b);
2515 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2516 vget_lane_f64 (float64x1_t __a, const int __b)
2518 return __aarch64_vget_lane_f64 (__a, __b);
2521 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2522 vget_lane_p8 (poly8x8_t __a, const int __b)
2524 return __aarch64_vget_lane_p8 (__a, __b);
2527 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2528 vget_lane_p16 (poly16x4_t __a, const int __b)
2530 return __aarch64_vget_lane_p16 (__a, __b);
2533 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2534 vget_lane_s8 (int8x8_t __a, const int __b)
2536 return __aarch64_vget_lane_s8 (__a, __b);
2539 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2540 vget_lane_s16 (int16x4_t __a, const int __b)
2542 return __aarch64_vget_lane_s16 (__a, __b);
2545 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2546 vget_lane_s32 (int32x2_t __a, const int __b)
2548 return __aarch64_vget_lane_s32 (__a, __b);
2551 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2552 vget_lane_s64 (int64x1_t __a, const int __b)
2554 return __aarch64_vget_lane_s64 (__a, __b);
2557 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2558 vget_lane_u8 (uint8x8_t __a, const int __b)
2560 return __aarch64_vget_lane_u8 (__a, __b);
2563 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2564 vget_lane_u16 (uint16x4_t __a, const int __b)
2566 return __aarch64_vget_lane_u16 (__a, __b);
2569 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2570 vget_lane_u32 (uint32x2_t __a, const int __b)
2572 return __aarch64_vget_lane_u32 (__a, __b);
2575 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2576 vget_lane_u64 (uint64x1_t __a, const int __b)
2578 return __aarch64_vget_lane_u64 (__a, __b);
2581 /* vgetq_lane */
2583 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2584 vgetq_lane_f32 (float32x4_t __a, const int __b)
2586 return __aarch64_vgetq_lane_f32 (__a, __b);
2589 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2590 vgetq_lane_f64 (float64x2_t __a, const int __b)
2592 return __aarch64_vgetq_lane_f64 (__a, __b);
2595 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2596 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2598 return __aarch64_vgetq_lane_p8 (__a, __b);
2601 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2602 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2604 return __aarch64_vgetq_lane_p16 (__a, __b);
2607 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2608 vgetq_lane_s8 (int8x16_t __a, const int __b)
2610 return __aarch64_vgetq_lane_s8 (__a, __b);
2613 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2614 vgetq_lane_s16 (int16x8_t __a, const int __b)
2616 return __aarch64_vgetq_lane_s16 (__a, __b);
2619 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2620 vgetq_lane_s32 (int32x4_t __a, const int __b)
2622 return __aarch64_vgetq_lane_s32 (__a, __b);
2625 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2626 vgetq_lane_s64 (int64x2_t __a, const int __b)
2628 return __aarch64_vgetq_lane_s64 (__a, __b);
2631 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2632 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2634 return __aarch64_vgetq_lane_u8 (__a, __b);
2637 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2638 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2640 return __aarch64_vgetq_lane_u16 (__a, __b);
2643 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2644 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2646 return __aarch64_vgetq_lane_u32 (__a, __b);
2649 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2650 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2652 return __aarch64_vgetq_lane_u64 (__a, __b);
2655 /* vreinterpret */
2657 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2658 vreinterpret_p8_f64 (float64x1_t __a)
2660 return __builtin_aarch64_reinterpretv8qidf_ps (__a);
2663 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2664 vreinterpret_p8_s8 (int8x8_t __a)
2666 return (poly8x8_t) __a;
2669 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2670 vreinterpret_p8_s16 (int16x4_t __a)
2672 return (poly8x8_t) __a;
2675 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2676 vreinterpret_p8_s32 (int32x2_t __a)
2678 return (poly8x8_t) __a;
2681 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2682 vreinterpret_p8_s64 (int64x1_t __a)
2684 return (poly8x8_t) __a;
2687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2688 vreinterpret_p8_f32 (float32x2_t __a)
2690 return (poly8x8_t) __a;
2693 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2694 vreinterpret_p8_u8 (uint8x8_t __a)
2696 return (poly8x8_t) __a;
2699 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2700 vreinterpret_p8_u16 (uint16x4_t __a)
2702 return (poly8x8_t) __a;
2705 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2706 vreinterpret_p8_u32 (uint32x2_t __a)
2708 return (poly8x8_t) __a;
2711 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2712 vreinterpret_p8_u64 (uint64x1_t __a)
2714 return (poly8x8_t) __a;
2717 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2718 vreinterpret_p8_p16 (poly16x4_t __a)
2720 return (poly8x8_t) __a;
2723 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2724 vreinterpretq_p8_f64 (float64x2_t __a)
2726 return (poly8x16_t) __a;
2729 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2730 vreinterpretq_p8_s8 (int8x16_t __a)
2732 return (poly8x16_t) __a;
2735 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2736 vreinterpretq_p8_s16 (int16x8_t __a)
2738 return (poly8x16_t) __a;
2741 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2742 vreinterpretq_p8_s32 (int32x4_t __a)
2744 return (poly8x16_t) __a;
2747 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2748 vreinterpretq_p8_s64 (int64x2_t __a)
2750 return (poly8x16_t) __a;
2753 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2754 vreinterpretq_p8_f32 (float32x4_t __a)
2756 return (poly8x16_t) __a;
2759 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2760 vreinterpretq_p8_u8 (uint8x16_t __a)
2762 return (poly8x16_t) __a;
2765 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2766 vreinterpretq_p8_u16 (uint16x8_t __a)
2768 return (poly8x16_t) __a;
2771 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2772 vreinterpretq_p8_u32 (uint32x4_t __a)
2774 return (poly8x16_t) __a;
2777 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2778 vreinterpretq_p8_u64 (uint64x2_t __a)
2780 return (poly8x16_t) __a;
2783 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2784 vreinterpretq_p8_p16 (poly16x8_t __a)
2786 return (poly8x16_t) __a;
2789 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2790 vreinterpret_p16_f64 (float64x1_t __a)
2792 return __builtin_aarch64_reinterpretv4hidf_ps (__a);
2795 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2796 vreinterpret_p16_s8 (int8x8_t __a)
2798 return (poly16x4_t) __a;
2801 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2802 vreinterpret_p16_s16 (int16x4_t __a)
2804 return (poly16x4_t) __a;
2807 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2808 vreinterpret_p16_s32 (int32x2_t __a)
2810 return (poly16x4_t) __a;
2813 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2814 vreinterpret_p16_s64 (int64x1_t __a)
2816 return (poly16x4_t) __a;
2819 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2820 vreinterpret_p16_f32 (float32x2_t __a)
2822 return (poly16x4_t) __a;
2825 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2826 vreinterpret_p16_u8 (uint8x8_t __a)
2828 return (poly16x4_t) __a;
2831 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2832 vreinterpret_p16_u16 (uint16x4_t __a)
2834 return (poly16x4_t) __a;
2837 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2838 vreinterpret_p16_u32 (uint32x2_t __a)
2840 return (poly16x4_t) __a;
2843 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2844 vreinterpret_p16_u64 (uint64x1_t __a)
2846 return (poly16x4_t) __a;
2849 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2850 vreinterpret_p16_p8 (poly8x8_t __a)
2852 return (poly16x4_t) __a;
2855 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2856 vreinterpretq_p16_f64 (float64x2_t __a)
2858 return (poly16x8_t) __a;
2861 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2862 vreinterpretq_p16_s8 (int8x16_t __a)
2864 return (poly16x8_t) __a;
2867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2868 vreinterpretq_p16_s16 (int16x8_t __a)
2870 return (poly16x8_t) __a;
2873 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2874 vreinterpretq_p16_s32 (int32x4_t __a)
2876 return (poly16x8_t) __a;
2879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2880 vreinterpretq_p16_s64 (int64x2_t __a)
2882 return (poly16x8_t) __a;
2885 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2886 vreinterpretq_p16_f32 (float32x4_t __a)
2888 return (poly16x8_t) __a;
2891 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2892 vreinterpretq_p16_u8 (uint8x16_t __a)
2894 return (poly16x8_t) __a;
2897 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2898 vreinterpretq_p16_u16 (uint16x8_t __a)
2900 return (poly16x8_t) __a;
2903 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2904 vreinterpretq_p16_u32 (uint32x4_t __a)
2906 return (poly16x8_t) __a;
2909 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2910 vreinterpretq_p16_u64 (uint64x2_t __a)
2912 return (poly16x8_t) __a;
2915 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2916 vreinterpretq_p16_p8 (poly8x16_t __a)
2918 return (poly16x8_t) __a;
2921 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2922 vreinterpret_f32_f64 (float64x1_t __a)
2924 return __builtin_aarch64_reinterpretv2sfdf (__a);
2927 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2928 vreinterpret_f32_s8 (int8x8_t __a)
2930 return (float32x2_t) __a;
2933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2934 vreinterpret_f32_s16 (int16x4_t __a)
2936 return (float32x2_t) __a;
2939 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2940 vreinterpret_f32_s32 (int32x2_t __a)
2942 return (float32x2_t) __a;
2945 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2946 vreinterpret_f32_s64 (int64x1_t __a)
2948 return (float32x2_t) __a;
2951 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2952 vreinterpret_f32_u8 (uint8x8_t __a)
2954 return (float32x2_t) __a;
2957 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2958 vreinterpret_f32_u16 (uint16x4_t __a)
2960 return (float32x2_t) __a;
2963 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2964 vreinterpret_f32_u32 (uint32x2_t __a)
2966 return (float32x2_t) __a;
2969 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2970 vreinterpret_f32_u64 (uint64x1_t __a)
2972 return (float32x2_t) __a;
2975 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2976 vreinterpret_f32_p8 (poly8x8_t __a)
2978 return (float32x2_t) __a;
2981 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2982 vreinterpret_f32_p16 (poly16x4_t __a)
2984 return (float32x2_t) __a;
2987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2988 vreinterpretq_f32_f64 (float64x2_t __a)
2990 return (float32x4_t) __a;
2993 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2994 vreinterpretq_f32_s8 (int8x16_t __a)
2996 return (float32x4_t) __a;
2999 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3000 vreinterpretq_f32_s16 (int16x8_t __a)
3002 return (float32x4_t) __a;
3005 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3006 vreinterpretq_f32_s32 (int32x4_t __a)
3008 return (float32x4_t) __a;
3011 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3012 vreinterpretq_f32_s64 (int64x2_t __a)
3014 return (float32x4_t) __a;
3017 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3018 vreinterpretq_f32_u8 (uint8x16_t __a)
3020 return (float32x4_t) __a;
3023 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3024 vreinterpretq_f32_u16 (uint16x8_t __a)
3026 return (float32x4_t) __a;
3029 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3030 vreinterpretq_f32_u32 (uint32x4_t __a)
3032 return (float32x4_t) __a;
3035 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3036 vreinterpretq_f32_u64 (uint64x2_t __a)
3038 return (float32x4_t) __a;
3041 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3042 vreinterpretq_f32_p8 (poly8x16_t __a)
3044 return (float32x4_t) __a;
3047 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3048 vreinterpretq_f32_p16 (poly16x8_t __a)
3050 return (float32x4_t) __a;
3053 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3054 vreinterpret_f64_f32 (float32x2_t __a)
3056 return __builtin_aarch64_reinterpretdfv2sf (__a);
3059 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3060 vreinterpret_f64_p8 (poly8x8_t __a)
3062 return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
3065 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3066 vreinterpret_f64_p16 (poly16x4_t __a)
3068 return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
3071 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3072 vreinterpret_f64_s8 (int8x8_t __a)
3074 return __builtin_aarch64_reinterpretdfv8qi (__a);
3077 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3078 vreinterpret_f64_s16 (int16x4_t __a)
3080 return __builtin_aarch64_reinterpretdfv4hi (__a);
3083 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3084 vreinterpret_f64_s32 (int32x2_t __a)
3086 return __builtin_aarch64_reinterpretdfv2si (__a);
3089 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3090 vreinterpret_f64_s64 (int64x1_t __a)
3092 return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
3095 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3096 vreinterpret_f64_u8 (uint8x8_t __a)
3098 return __builtin_aarch64_reinterpretdfv8qi_su (__a);
3101 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3102 vreinterpret_f64_u16 (uint16x4_t __a)
3104 return __builtin_aarch64_reinterpretdfv4hi_su (__a);
3107 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3108 vreinterpret_f64_u32 (uint32x2_t __a)
3110 return __builtin_aarch64_reinterpretdfv2si_su (__a);
3113 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3114 vreinterpret_f64_u64 (uint64x1_t __a)
3116 return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
3119 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3120 vreinterpretq_f64_f32 (float32x4_t __a)
3122 return (float64x2_t) __a;
3125 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3126 vreinterpretq_f64_p8 (poly8x16_t __a)
3128 return (float64x2_t) __a;
3131 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3132 vreinterpretq_f64_p16 (poly16x8_t __a)
3134 return (float64x2_t) __a;
3137 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3138 vreinterpretq_f64_s8 (int8x16_t __a)
3140 return (float64x2_t) __a;
3143 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3144 vreinterpretq_f64_s16 (int16x8_t __a)
3146 return (float64x2_t) __a;
3149 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3150 vreinterpretq_f64_s32 (int32x4_t __a)
3152 return (float64x2_t) __a;
3155 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3156 vreinterpretq_f64_s64 (int64x2_t __a)
3158 return (float64x2_t) __a;
3161 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3162 vreinterpretq_f64_u8 (uint8x16_t __a)
3164 return (float64x2_t) __a;
3167 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3168 vreinterpretq_f64_u16 (uint16x8_t __a)
3170 return (float64x2_t) __a;
3173 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3174 vreinterpretq_f64_u32 (uint32x4_t __a)
3176 return (float64x2_t) __a;
3179 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3180 vreinterpretq_f64_u64 (uint64x2_t __a)
3182 return (float64x2_t) __a;
3185 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3186 vreinterpret_s64_f64 (float64x1_t __a)
3188 return __builtin_aarch64_reinterpretdidf (__a);
3191 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3192 vreinterpret_s64_s8 (int8x8_t __a)
3194 return (int64x1_t) __a;
3197 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3198 vreinterpret_s64_s16 (int16x4_t __a)
3200 return (int64x1_t) __a;
3203 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3204 vreinterpret_s64_s32 (int32x2_t __a)
3206 return (int64x1_t) __a;
3209 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3210 vreinterpret_s64_f32 (float32x2_t __a)
3212 return (int64x1_t) __a;
3215 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3216 vreinterpret_s64_u8 (uint8x8_t __a)
3218 return (int64x1_t) __a;
3221 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3222 vreinterpret_s64_u16 (uint16x4_t __a)
3224 return (int64x1_t) __a;
3227 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3228 vreinterpret_s64_u32 (uint32x2_t __a)
3230 return (int64x1_t) __a;
3233 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3234 vreinterpret_s64_u64 (uint64x1_t __a)
3236 return (int64x1_t) __a;
3239 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3240 vreinterpret_s64_p8 (poly8x8_t __a)
3242 return (int64x1_t) __a;
3245 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3246 vreinterpret_s64_p16 (poly16x4_t __a)
3248 return (int64x1_t) __a;
3251 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3252 vreinterpretq_s64_f64 (float64x2_t __a)
3254 return (int64x2_t) __a;
3257 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3258 vreinterpretq_s64_s8 (int8x16_t __a)
3260 return (int64x2_t) __a;
3263 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3264 vreinterpretq_s64_s16 (int16x8_t __a)
3266 return (int64x2_t) __a;
3269 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3270 vreinterpretq_s64_s32 (int32x4_t __a)
3272 return (int64x2_t) __a;
3275 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3276 vreinterpretq_s64_f32 (float32x4_t __a)
3278 return (int64x2_t) __a;
3281 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3282 vreinterpretq_s64_u8 (uint8x16_t __a)
3284 return (int64x2_t) __a;
3287 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3288 vreinterpretq_s64_u16 (uint16x8_t __a)
3290 return (int64x2_t) __a;
3293 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3294 vreinterpretq_s64_u32 (uint32x4_t __a)
3296 return (int64x2_t) __a;
3299 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3300 vreinterpretq_s64_u64 (uint64x2_t __a)
3302 return (int64x2_t) __a;
3305 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3306 vreinterpretq_s64_p8 (poly8x16_t __a)
3308 return (int64x2_t) __a;
3311 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3312 vreinterpretq_s64_p16 (poly16x8_t __a)
3314 return (int64x2_t) __a;
3317 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3318 vreinterpret_u64_f64 (float64x1_t __a)
3320 return __builtin_aarch64_reinterpretdidf_us (__a);
3323 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3324 vreinterpret_u64_s8 (int8x8_t __a)
3326 return (uint64x1_t) __a;
3329 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3330 vreinterpret_u64_s16 (int16x4_t __a)
3332 return (uint64x1_t) __a;
3335 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3336 vreinterpret_u64_s32 (int32x2_t __a)
3338 return (uint64x1_t) __a;
3341 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3342 vreinterpret_u64_s64 (int64x1_t __a)
3344 return (uint64x1_t) __a;
3347 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3348 vreinterpret_u64_f32 (float32x2_t __a)
3350 return (uint64x1_t) __a;
3353 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3354 vreinterpret_u64_u8 (uint8x8_t __a)
3356 return (uint64x1_t) __a;
3359 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3360 vreinterpret_u64_u16 (uint16x4_t __a)
3362 return (uint64x1_t) __a;
3365 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3366 vreinterpret_u64_u32 (uint32x2_t __a)
3368 return (uint64x1_t) __a;
3371 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3372 vreinterpret_u64_p8 (poly8x8_t __a)
3374 return (uint64x1_t) __a;
3377 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3378 vreinterpret_u64_p16 (poly16x4_t __a)
3380 return (uint64x1_t) __a;
3383 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3384 vreinterpretq_u64_f64 (float64x2_t __a)
3386 return (uint64x2_t) __a;
3389 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3390 vreinterpretq_u64_s8 (int8x16_t __a)
3392 return (uint64x2_t) __a;
3395 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3396 vreinterpretq_u64_s16 (int16x8_t __a)
3398 return (uint64x2_t) __a;
3401 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3402 vreinterpretq_u64_s32 (int32x4_t __a)
3404 return (uint64x2_t) __a;
3407 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3408 vreinterpretq_u64_s64 (int64x2_t __a)
3410 return (uint64x2_t) __a;
3413 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3414 vreinterpretq_u64_f32 (float32x4_t __a)
3416 return (uint64x2_t) __a;
3419 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3420 vreinterpretq_u64_u8 (uint8x16_t __a)
3422 return (uint64x2_t) __a;
3425 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3426 vreinterpretq_u64_u16 (uint16x8_t __a)
3428 return (uint64x2_t) __a;
3431 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3432 vreinterpretq_u64_u32 (uint32x4_t __a)
3434 return (uint64x2_t) __a;
3437 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3438 vreinterpretq_u64_p8 (poly8x16_t __a)
3440 return (uint64x2_t) __a;
3443 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3444 vreinterpretq_u64_p16 (poly16x8_t __a)
3446 return (uint64x2_t) __a;
3449 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3450 vreinterpret_s8_f64 (float64x1_t __a)
3452 return __builtin_aarch64_reinterpretv8qidf (__a);
3455 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3456 vreinterpret_s8_s16 (int16x4_t __a)
3458 return (int8x8_t) __a;
3461 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3462 vreinterpret_s8_s32 (int32x2_t __a)
3464 return (int8x8_t) __a;
3467 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3468 vreinterpret_s8_s64 (int64x1_t __a)
3470 return (int8x8_t) __a;
3473 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3474 vreinterpret_s8_f32 (float32x2_t __a)
3476 return (int8x8_t) __a;
3479 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3480 vreinterpret_s8_u8 (uint8x8_t __a)
3482 return (int8x8_t) __a;
3485 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3486 vreinterpret_s8_u16 (uint16x4_t __a)
3488 return (int8x8_t) __a;
3491 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3492 vreinterpret_s8_u32 (uint32x2_t __a)
3494 return (int8x8_t) __a;
3497 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3498 vreinterpret_s8_u64 (uint64x1_t __a)
3500 return (int8x8_t) __a;
3503 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3504 vreinterpret_s8_p8 (poly8x8_t __a)
3506 return (int8x8_t) __a;
3509 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3510 vreinterpret_s8_p16 (poly16x4_t __a)
3512 return (int8x8_t) __a;
3515 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3516 vreinterpretq_s8_f64 (float64x2_t __a)
3518 return (int8x16_t) __a;
3521 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3522 vreinterpretq_s8_s16 (int16x8_t __a)
3524 return (int8x16_t) __a;
3527 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3528 vreinterpretq_s8_s32 (int32x4_t __a)
3530 return (int8x16_t) __a;
3533 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3534 vreinterpretq_s8_s64 (int64x2_t __a)
3536 return (int8x16_t) __a;
3539 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3540 vreinterpretq_s8_f32 (float32x4_t __a)
3542 return (int8x16_t) __a;
3545 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3546 vreinterpretq_s8_u8 (uint8x16_t __a)
3548 return (int8x16_t) __a;
3551 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3552 vreinterpretq_s8_u16 (uint16x8_t __a)
3554 return (int8x16_t) __a;
3557 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3558 vreinterpretq_s8_u32 (uint32x4_t __a)
3560 return (int8x16_t) __a;
3563 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3564 vreinterpretq_s8_u64 (uint64x2_t __a)
3566 return (int8x16_t) __a;
3569 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3570 vreinterpretq_s8_p8 (poly8x16_t __a)
3572 return (int8x16_t) __a;
3575 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3576 vreinterpretq_s8_p16 (poly16x8_t __a)
3578 return (int8x16_t) __a;
3581 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3582 vreinterpret_s16_f64 (float64x1_t __a)
3584 return __builtin_aarch64_reinterpretv4hidf (__a);
3587 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3588 vreinterpret_s16_s8 (int8x8_t __a)
3590 return (int16x4_t) __a;
3593 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3594 vreinterpret_s16_s32 (int32x2_t __a)
3596 return (int16x4_t) __a;
3599 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3600 vreinterpret_s16_s64 (int64x1_t __a)
3602 return (int16x4_t) __a;
3605 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3606 vreinterpret_s16_f32 (float32x2_t __a)
3608 return (int16x4_t) __a;
3611 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3612 vreinterpret_s16_u8 (uint8x8_t __a)
3614 return (int16x4_t) __a;
3617 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3618 vreinterpret_s16_u16 (uint16x4_t __a)
3620 return (int16x4_t) __a;
3623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3624 vreinterpret_s16_u32 (uint32x2_t __a)
3626 return (int16x4_t) __a;
3629 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3630 vreinterpret_s16_u64 (uint64x1_t __a)
3632 return (int16x4_t) __a;
3635 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3636 vreinterpret_s16_p8 (poly8x8_t __a)
3638 return (int16x4_t) __a;
3641 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3642 vreinterpret_s16_p16 (poly16x4_t __a)
3644 return (int16x4_t) __a;
3647 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3648 vreinterpretq_s16_f64 (float64x2_t __a)
3650 return (int16x8_t) __a;
3653 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3654 vreinterpretq_s16_s8 (int8x16_t __a)
3656 return (int16x8_t) __a;
3659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3660 vreinterpretq_s16_s32 (int32x4_t __a)
3662 return (int16x8_t) __a;
3665 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3666 vreinterpretq_s16_s64 (int64x2_t __a)
3668 return (int16x8_t) __a;
3671 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3672 vreinterpretq_s16_f32 (float32x4_t __a)
3674 return (int16x8_t) __a;
3677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3678 vreinterpretq_s16_u8 (uint8x16_t __a)
3680 return (int16x8_t) __a;
3683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3684 vreinterpretq_s16_u16 (uint16x8_t __a)
3686 return (int16x8_t) __a;
3689 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3690 vreinterpretq_s16_u32 (uint32x4_t __a)
3692 return (int16x8_t) __a;
3695 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3696 vreinterpretq_s16_u64 (uint64x2_t __a)
3698 return (int16x8_t) __a;
3701 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3702 vreinterpretq_s16_p8 (poly8x16_t __a)
3704 return (int16x8_t) __a;
3707 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3708 vreinterpretq_s16_p16 (poly16x8_t __a)
3710 return (int16x8_t) __a;
3713 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3714 vreinterpret_s32_f64 (float64x1_t __a)
3716 return __builtin_aarch64_reinterpretv2sidf (__a);
3719 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3720 vreinterpret_s32_s8 (int8x8_t __a)
3722 return (int32x2_t) __a;
3725 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3726 vreinterpret_s32_s16 (int16x4_t __a)
3728 return (int32x2_t) __a;
3731 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3732 vreinterpret_s32_s64 (int64x1_t __a)
3734 return (int32x2_t) __a;
3737 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3738 vreinterpret_s32_f32 (float32x2_t __a)
3740 return (int32x2_t) __a;
3743 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3744 vreinterpret_s32_u8 (uint8x8_t __a)
3746 return (int32x2_t) __a;
3749 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3750 vreinterpret_s32_u16 (uint16x4_t __a)
3752 return (int32x2_t) __a;
3755 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3756 vreinterpret_s32_u32 (uint32x2_t __a)
3758 return (int32x2_t) __a;
3761 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3762 vreinterpret_s32_u64 (uint64x1_t __a)
3764 return (int32x2_t) __a;
3767 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3768 vreinterpret_s32_p8 (poly8x8_t __a)
3770 return (int32x2_t) __a;
3773 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3774 vreinterpret_s32_p16 (poly16x4_t __a)
3776 return (int32x2_t) __a;
3779 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3780 vreinterpretq_s32_f64 (float64x2_t __a)
3782 return (int32x4_t) __a;
3785 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3786 vreinterpretq_s32_s8 (int8x16_t __a)
3788 return (int32x4_t) __a;
3791 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3792 vreinterpretq_s32_s16 (int16x8_t __a)
3794 return (int32x4_t) __a;
3797 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3798 vreinterpretq_s32_s64 (int64x2_t __a)
3800 return (int32x4_t) __a;
3803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3804 vreinterpretq_s32_f32 (float32x4_t __a)
3806 return (int32x4_t) __a;
3809 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3810 vreinterpretq_s32_u8 (uint8x16_t __a)
3812 return (int32x4_t) __a;
3815 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3816 vreinterpretq_s32_u16 (uint16x8_t __a)
3818 return (int32x4_t) __a;
3821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3822 vreinterpretq_s32_u32 (uint32x4_t __a)
3824 return (int32x4_t) __a;
3827 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3828 vreinterpretq_s32_u64 (uint64x2_t __a)
3830 return (int32x4_t) __a;
3833 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3834 vreinterpretq_s32_p8 (poly8x16_t __a)
3836 return (int32x4_t) __a;
3839 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3840 vreinterpretq_s32_p16 (poly16x8_t __a)
3842 return (int32x4_t) __a;
3845 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3846 vreinterpret_u8_f64 (float64x1_t __a)
3848 return __builtin_aarch64_reinterpretv8qidf_us (__a);
3851 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3852 vreinterpret_u8_s8 (int8x8_t __a)
3854 return (uint8x8_t) __a;
3857 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3858 vreinterpret_u8_s16 (int16x4_t __a)
3860 return (uint8x8_t) __a;
3863 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3864 vreinterpret_u8_s32 (int32x2_t __a)
3866 return (uint8x8_t) __a;
3869 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3870 vreinterpret_u8_s64 (int64x1_t __a)
3872 return (uint8x8_t) __a;
3875 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3876 vreinterpret_u8_f32 (float32x2_t __a)
3878 return (uint8x8_t) __a;
3881 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3882 vreinterpret_u8_u16 (uint16x4_t __a)
3884 return (uint8x8_t) __a;
3887 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3888 vreinterpret_u8_u32 (uint32x2_t __a)
3890 return (uint8x8_t) __a;
3893 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3894 vreinterpret_u8_u64 (uint64x1_t __a)
3896 return (uint8x8_t) __a;
3899 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3900 vreinterpret_u8_p8 (poly8x8_t __a)
3902 return (uint8x8_t) __a;
3905 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3906 vreinterpret_u8_p16 (poly16x4_t __a)
3908 return (uint8x8_t) __a;
3911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3912 vreinterpretq_u8_f64 (float64x2_t __a)
3914 return (uint8x16_t) __a;
3917 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3918 vreinterpretq_u8_s8 (int8x16_t __a)
3920 return (uint8x16_t) __a;
3923 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3924 vreinterpretq_u8_s16 (int16x8_t __a)
3926 return (uint8x16_t) __a;
3929 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3930 vreinterpretq_u8_s32 (int32x4_t __a)
3932 return (uint8x16_t) __a;
3935 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3936 vreinterpretq_u8_s64 (int64x2_t __a)
3938 return (uint8x16_t) __a;
3941 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3942 vreinterpretq_u8_f32 (float32x4_t __a)
3944 return (uint8x16_t) __a;
3947 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3948 vreinterpretq_u8_u16 (uint16x8_t __a)
3950 return (uint8x16_t) __a;
3953 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3954 vreinterpretq_u8_u32 (uint32x4_t __a)
3956 return (uint8x16_t) __a;
3959 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3960 vreinterpretq_u8_u64 (uint64x2_t __a)
3962 return (uint8x16_t) __a;
3965 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3966 vreinterpretq_u8_p8 (poly8x16_t __a)
3968 return (uint8x16_t) __a;
3971 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3972 vreinterpretq_u8_p16 (poly16x8_t __a)
3974 return (uint8x16_t) __a;
3977 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3978 vreinterpret_u16_f64 (float64x1_t __a)
3980 return __builtin_aarch64_reinterpretv4hidf_us (__a);
3983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3984 vreinterpret_u16_s8 (int8x8_t __a)
3986 return (uint16x4_t) __a;
3989 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3990 vreinterpret_u16_s16 (int16x4_t __a)
3992 return (uint16x4_t) __a;
3995 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3996 vreinterpret_u16_s32 (int32x2_t __a)
3998 return (uint16x4_t) __a;
4001 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4002 vreinterpret_u16_s64 (int64x1_t __a)
4004 return (uint16x4_t) __a;
4007 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4008 vreinterpret_u16_f32 (float32x2_t __a)
4010 return (uint16x4_t) __a;
4013 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4014 vreinterpret_u16_u8 (uint8x8_t __a)
4016 return (uint16x4_t) __a;
4019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4020 vreinterpret_u16_u32 (uint32x2_t __a)
4022 return (uint16x4_t) __a;
4025 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4026 vreinterpret_u16_u64 (uint64x1_t __a)
4028 return (uint16x4_t) __a;
4031 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4032 vreinterpret_u16_p8 (poly8x8_t __a)
4034 return (uint16x4_t) __a;
4037 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4038 vreinterpret_u16_p16 (poly16x4_t __a)
4040 return (uint16x4_t) __a;
4043 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4044 vreinterpretq_u16_f64 (float64x2_t __a)
4046 return (uint16x8_t) __a;
4049 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4050 vreinterpretq_u16_s8 (int8x16_t __a)
4052 return (uint16x8_t) __a;
4055 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4056 vreinterpretq_u16_s16 (int16x8_t __a)
4058 return (uint16x8_t) __a;
4061 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4062 vreinterpretq_u16_s32 (int32x4_t __a)
4064 return (uint16x8_t) __a;
4067 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4068 vreinterpretq_u16_s64 (int64x2_t __a)
4070 return (uint16x8_t) __a;
4073 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4074 vreinterpretq_u16_f32 (float32x4_t __a)
4076 return (uint16x8_t) __a;
4079 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4080 vreinterpretq_u16_u8 (uint8x16_t __a)
4082 return (uint16x8_t) __a;
4085 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4086 vreinterpretq_u16_u32 (uint32x4_t __a)
4088 return (uint16x8_t) __a;
4091 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4092 vreinterpretq_u16_u64 (uint64x2_t __a)
4094 return (uint16x8_t) __a;
4097 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4098 vreinterpretq_u16_p8 (poly8x16_t __a)
4100 return (uint16x8_t) __a;
4103 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4104 vreinterpretq_u16_p16 (poly16x8_t __a)
4106 return (uint16x8_t) __a;
4109 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4110 vreinterpret_u32_f64 (float64x1_t __a)
4112 return __builtin_aarch64_reinterpretv2sidf_us (__a);
4115 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4116 vreinterpret_u32_s8 (int8x8_t __a)
4118 return (uint32x2_t) __a;
4121 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4122 vreinterpret_u32_s16 (int16x4_t __a)
4124 return (uint32x2_t) __a;
4127 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4128 vreinterpret_u32_s32 (int32x2_t __a)
4130 return (uint32x2_t) __a;
4133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4134 vreinterpret_u32_s64 (int64x1_t __a)
4136 return (uint32x2_t) __a;
4139 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4140 vreinterpret_u32_f32 (float32x2_t __a)
4142 return (uint32x2_t) __a;
4145 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4146 vreinterpret_u32_u8 (uint8x8_t __a)
4148 return (uint32x2_t) __a;
4151 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4152 vreinterpret_u32_u16 (uint16x4_t __a)
4154 return (uint32x2_t) __a;
4157 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4158 vreinterpret_u32_u64 (uint64x1_t __a)
4160 return (uint32x2_t) __a;
4163 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4164 vreinterpret_u32_p8 (poly8x8_t __a)
4166 return (uint32x2_t) __a;
4169 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4170 vreinterpret_u32_p16 (poly16x4_t __a)
4172 return (uint32x2_t) __a;
4175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4176 vreinterpretq_u32_f64 (float64x2_t __a)
4178 return (uint32x4_t) __a;
4181 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4182 vreinterpretq_u32_s8 (int8x16_t __a)
4184 return (uint32x4_t) __a;
4187 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4188 vreinterpretq_u32_s16 (int16x8_t __a)
4190 return (uint32x4_t) __a;
4193 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4194 vreinterpretq_u32_s32 (int32x4_t __a)
4196 return (uint32x4_t) __a;
4199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4200 vreinterpretq_u32_s64 (int64x2_t __a)
4202 return (uint32x4_t) __a;
4205 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4206 vreinterpretq_u32_f32 (float32x4_t __a)
4208 return (uint32x4_t) __a;
4211 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4212 vreinterpretq_u32_u8 (uint8x16_t __a)
4214 return (uint32x4_t) __a;
4217 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4218 vreinterpretq_u32_u16 (uint16x8_t __a)
4220 return (uint32x4_t) __a;
4223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4224 vreinterpretq_u32_u64 (uint64x2_t __a)
4226 return (uint32x4_t) __a;
4229 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4230 vreinterpretq_u32_p8 (poly8x16_t __a)
4232 return (uint32x4_t) __a;
4235 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4236 vreinterpretq_u32_p16 (poly16x8_t __a)
4238 return (uint32x4_t) __a;
4241 #define __GET_LOW(__TYPE) \
4242 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4243 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
4244 return vreinterpret_##__TYPE##_u64 (lo);
4246 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4247 vget_low_f32 (float32x4_t __a)
4249 __GET_LOW (f32);
4252 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4253 vget_low_f64 (float64x2_t __a)
4255 return vgetq_lane_f64 (__a, 0);
4258 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4259 vget_low_p8 (poly8x16_t __a)
4261 __GET_LOW (p8);
4264 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4265 vget_low_p16 (poly16x8_t __a)
4267 __GET_LOW (p16);
4270 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4271 vget_low_s8 (int8x16_t __a)
4273 __GET_LOW (s8);
4276 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4277 vget_low_s16 (int16x8_t __a)
4279 __GET_LOW (s16);
4282 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4283 vget_low_s32 (int32x4_t __a)
4285 __GET_LOW (s32);
4288 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4289 vget_low_s64 (int64x2_t __a)
4291 return vgetq_lane_s64 (__a, 0);
4294 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4295 vget_low_u8 (uint8x16_t __a)
4297 __GET_LOW (u8);
4300 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4301 vget_low_u16 (uint16x8_t __a)
4303 __GET_LOW (u16);
4306 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4307 vget_low_u32 (uint32x4_t __a)
4309 __GET_LOW (u32);
4312 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4313 vget_low_u64 (uint64x2_t __a)
4315 return vgetq_lane_u64 (__a, 0);
4318 #undef __GET_LOW
4320 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4321 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4323 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4326 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4327 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4329 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4333 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4335 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4338 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4339 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4341 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4344 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4345 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4347 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4350 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4351 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4353 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4354 (int8x8_t) __b);
4357 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4358 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4360 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4361 (int16x4_t) __b);
4364 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4365 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4367 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4368 (int32x2_t) __b);
4371 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4372 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4374 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4375 (int64x1_t) __b);
4378 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4379 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4381 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4384 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4385 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4387 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4388 (int8x8_t) __b);
4391 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4392 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4394 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4395 (int16x4_t) __b);
4398 /* Start of temporary inline asm implementations. */
4400 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4401 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4403 int8x8_t result;
4404 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4405 : "=w"(result)
4406 : "0"(a), "w"(b), "w"(c)
4407 : /* No clobbers */);
4408 return result;
4411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4412 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4414 int16x4_t result;
4415 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4416 : "=w"(result)
4417 : "0"(a), "w"(b), "w"(c)
4418 : /* No clobbers */);
4419 return result;
4422 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4423 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4425 int32x2_t result;
4426 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4427 : "=w"(result)
4428 : "0"(a), "w"(b), "w"(c)
4429 : /* No clobbers */);
4430 return result;
4433 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4434 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4436 uint8x8_t result;
4437 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4438 : "=w"(result)
4439 : "0"(a), "w"(b), "w"(c)
4440 : /* No clobbers */);
4441 return result;
4444 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4445 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4447 uint16x4_t result;
4448 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4449 : "=w"(result)
4450 : "0"(a), "w"(b), "w"(c)
4451 : /* No clobbers */);
4452 return result;
4455 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4456 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4458 uint32x2_t result;
4459 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4460 : "=w"(result)
4461 : "0"(a), "w"(b), "w"(c)
4462 : /* No clobbers */);
4463 return result;
4466 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4467 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4469 int16x8_t result;
4470 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4471 : "=w"(result)
4472 : "0"(a), "w"(b), "w"(c)
4473 : /* No clobbers */);
4474 return result;
4477 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4478 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4480 int32x4_t result;
4481 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4482 : "=w"(result)
4483 : "0"(a), "w"(b), "w"(c)
4484 : /* No clobbers */);
4485 return result;
4488 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4489 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4491 int64x2_t result;
4492 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4493 : "=w"(result)
4494 : "0"(a), "w"(b), "w"(c)
4495 : /* No clobbers */);
4496 return result;
4499 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4500 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4502 uint16x8_t result;
4503 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4504 : "=w"(result)
4505 : "0"(a), "w"(b), "w"(c)
4506 : /* No clobbers */);
4507 return result;
4510 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4511 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4513 uint32x4_t result;
4514 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4515 : "=w"(result)
4516 : "0"(a), "w"(b), "w"(c)
4517 : /* No clobbers */);
4518 return result;
4521 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4522 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4524 uint64x2_t result;
4525 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4526 : "=w"(result)
4527 : "0"(a), "w"(b), "w"(c)
4528 : /* No clobbers */);
4529 return result;
4532 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4533 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4535 int16x8_t result;
4536 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4537 : "=w"(result)
4538 : "0"(a), "w"(b), "w"(c)
4539 : /* No clobbers */);
4540 return result;
4543 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4544 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4546 int32x4_t result;
4547 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4548 : "=w"(result)
4549 : "0"(a), "w"(b), "w"(c)
4550 : /* No clobbers */);
4551 return result;
4554 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4555 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4557 int64x2_t result;
4558 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4559 : "=w"(result)
4560 : "0"(a), "w"(b), "w"(c)
4561 : /* No clobbers */);
4562 return result;
4565 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4566 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4568 uint16x8_t result;
4569 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4570 : "=w"(result)
4571 : "0"(a), "w"(b), "w"(c)
4572 : /* No clobbers */);
4573 return result;
4576 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4577 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4579 uint32x4_t result;
4580 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4581 : "=w"(result)
4582 : "0"(a), "w"(b), "w"(c)
4583 : /* No clobbers */);
4584 return result;
4587 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4588 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4590 uint64x2_t result;
4591 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4592 : "=w"(result)
4593 : "0"(a), "w"(b), "w"(c)
4594 : /* No clobbers */);
4595 return result;
4598 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4599 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4601 int8x16_t result;
4602 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4603 : "=w"(result)
4604 : "0"(a), "w"(b), "w"(c)
4605 : /* No clobbers */);
4606 return result;
4609 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4610 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4612 int16x8_t result;
4613 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4614 : "=w"(result)
4615 : "0"(a), "w"(b), "w"(c)
4616 : /* No clobbers */);
4617 return result;
4620 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4621 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4623 int32x4_t result;
4624 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4625 : "=w"(result)
4626 : "0"(a), "w"(b), "w"(c)
4627 : /* No clobbers */);
4628 return result;
4631 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4632 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4634 uint8x16_t result;
4635 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4636 : "=w"(result)
4637 : "0"(a), "w"(b), "w"(c)
4638 : /* No clobbers */);
4639 return result;
4642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4643 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4645 uint16x8_t result;
4646 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4647 : "=w"(result)
4648 : "0"(a), "w"(b), "w"(c)
4649 : /* No clobbers */);
4650 return result;
4653 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4654 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4656 uint32x4_t result;
4657 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4658 : "=w"(result)
4659 : "0"(a), "w"(b), "w"(c)
4660 : /* No clobbers */);
4661 return result;
4664 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4665 vabd_f32 (float32x2_t a, float32x2_t b)
4667 float32x2_t result;
4668 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4669 : "=w"(result)
4670 : "w"(a), "w"(b)
4671 : /* No clobbers */);
4672 return result;
4675 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4676 vabd_s8 (int8x8_t a, int8x8_t b)
4678 int8x8_t result;
4679 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4680 : "=w"(result)
4681 : "w"(a), "w"(b)
4682 : /* No clobbers */);
4683 return result;
4686 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4687 vabd_s16 (int16x4_t a, int16x4_t b)
4689 int16x4_t result;
4690 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4691 : "=w"(result)
4692 : "w"(a), "w"(b)
4693 : /* No clobbers */);
4694 return result;
4697 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4698 vabd_s32 (int32x2_t a, int32x2_t b)
4700 int32x2_t result;
4701 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4702 : "=w"(result)
4703 : "w"(a), "w"(b)
4704 : /* No clobbers */);
4705 return result;
4708 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4709 vabd_u8 (uint8x8_t a, uint8x8_t b)
4711 uint8x8_t result;
4712 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4713 : "=w"(result)
4714 : "w"(a), "w"(b)
4715 : /* No clobbers */);
4716 return result;
4719 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4720 vabd_u16 (uint16x4_t a, uint16x4_t b)
4722 uint16x4_t result;
4723 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4724 : "=w"(result)
4725 : "w"(a), "w"(b)
4726 : /* No clobbers */);
4727 return result;
4730 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4731 vabd_u32 (uint32x2_t a, uint32x2_t b)
4733 uint32x2_t result;
4734 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4735 : "=w"(result)
4736 : "w"(a), "w"(b)
4737 : /* No clobbers */);
4738 return result;
4741 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4742 vabdd_f64 (float64_t a, float64_t b)
4744 float64_t result;
4745 __asm__ ("fabd %d0, %d1, %d2"
4746 : "=w"(result)
4747 : "w"(a), "w"(b)
4748 : /* No clobbers */);
4749 return result;
4752 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4753 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4755 int16x8_t result;
4756 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4757 : "=w"(result)
4758 : "w"(a), "w"(b)
4759 : /* No clobbers */);
4760 return result;
4763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4764 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4766 int32x4_t result;
4767 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4768 : "=w"(result)
4769 : "w"(a), "w"(b)
4770 : /* No clobbers */);
4771 return result;
4774 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4775 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4777 int64x2_t result;
4778 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4779 : "=w"(result)
4780 : "w"(a), "w"(b)
4781 : /* No clobbers */);
4782 return result;
4785 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4786 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4788 uint16x8_t result;
4789 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4790 : "=w"(result)
4791 : "w"(a), "w"(b)
4792 : /* No clobbers */);
4793 return result;
4796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4797 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4799 uint32x4_t result;
4800 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4801 : "=w"(result)
4802 : "w"(a), "w"(b)
4803 : /* No clobbers */);
4804 return result;
4807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4808 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4810 uint64x2_t result;
4811 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4812 : "=w"(result)
4813 : "w"(a), "w"(b)
4814 : /* No clobbers */);
4815 return result;
4818 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4819 vabdl_s8 (int8x8_t a, int8x8_t b)
4821 int16x8_t result;
4822 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4823 : "=w"(result)
4824 : "w"(a), "w"(b)
4825 : /* No clobbers */);
4826 return result;
4829 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4830 vabdl_s16 (int16x4_t a, int16x4_t b)
4832 int32x4_t result;
4833 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4834 : "=w"(result)
4835 : "w"(a), "w"(b)
4836 : /* No clobbers */);
4837 return result;
4840 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4841 vabdl_s32 (int32x2_t a, int32x2_t b)
4843 int64x2_t result;
4844 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4845 : "=w"(result)
4846 : "w"(a), "w"(b)
4847 : /* No clobbers */);
4848 return result;
4851 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4852 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4854 uint16x8_t result;
4855 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4856 : "=w"(result)
4857 : "w"(a), "w"(b)
4858 : /* No clobbers */);
4859 return result;
4862 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4863 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4865 uint32x4_t result;
4866 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4867 : "=w"(result)
4868 : "w"(a), "w"(b)
4869 : /* No clobbers */);
4870 return result;
4873 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4874 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4876 uint64x2_t result;
4877 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4878 : "=w"(result)
4879 : "w"(a), "w"(b)
4880 : /* No clobbers */);
4881 return result;
4884 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4885 vabdq_f32 (float32x4_t a, float32x4_t b)
4887 float32x4_t result;
4888 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4889 : "=w"(result)
4890 : "w"(a), "w"(b)
4891 : /* No clobbers */);
4892 return result;
4895 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4896 vabdq_f64 (float64x2_t a, float64x2_t b)
4898 float64x2_t result;
4899 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4900 : "=w"(result)
4901 : "w"(a), "w"(b)
4902 : /* No clobbers */);
4903 return result;
4906 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4907 vabdq_s8 (int8x16_t a, int8x16_t b)
4909 int8x16_t result;
4910 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4911 : "=w"(result)
4912 : "w"(a), "w"(b)
4913 : /* No clobbers */);
4914 return result;
4917 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4918 vabdq_s16 (int16x8_t a, int16x8_t b)
4920 int16x8_t result;
4921 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4922 : "=w"(result)
4923 : "w"(a), "w"(b)
4924 : /* No clobbers */);
4925 return result;
4928 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4929 vabdq_s32 (int32x4_t a, int32x4_t b)
4931 int32x4_t result;
4932 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4933 : "=w"(result)
4934 : "w"(a), "w"(b)
4935 : /* No clobbers */);
4936 return result;
4939 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4940 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4942 uint8x16_t result;
4943 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4944 : "=w"(result)
4945 : "w"(a), "w"(b)
4946 : /* No clobbers */);
4947 return result;
4950 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4951 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4953 uint16x8_t result;
4954 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4955 : "=w"(result)
4956 : "w"(a), "w"(b)
4957 : /* No clobbers */);
4958 return result;
4961 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4962 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4964 uint32x4_t result;
4965 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4966 : "=w"(result)
4967 : "w"(a), "w"(b)
4968 : /* No clobbers */);
4969 return result;
4972 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4973 vabds_f32 (float32_t a, float32_t b)
4975 float32_t result;
4976 __asm__ ("fabd %s0, %s1, %s2"
4977 : "=w"(result)
4978 : "w"(a), "w"(b)
4979 : /* No clobbers */);
4980 return result;
4983 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4984 vaddlv_s8 (int8x8_t a)
4986 int16_t result;
4987 __asm__ ("saddlv %h0,%1.8b"
4988 : "=w"(result)
4989 : "w"(a)
4990 : /* No clobbers */);
4991 return result;
4994 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4995 vaddlv_s16 (int16x4_t a)
4997 int32_t result;
4998 __asm__ ("saddlv %s0,%1.4h"
4999 : "=w"(result)
5000 : "w"(a)
5001 : /* No clobbers */);
5002 return result;
5005 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5006 vaddlv_u8 (uint8x8_t a)
5008 uint16_t result;
5009 __asm__ ("uaddlv %h0,%1.8b"
5010 : "=w"(result)
5011 : "w"(a)
5012 : /* No clobbers */);
5013 return result;
5016 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5017 vaddlv_u16 (uint16x4_t a)
5019 uint32_t result;
5020 __asm__ ("uaddlv %s0,%1.4h"
5021 : "=w"(result)
5022 : "w"(a)
5023 : /* No clobbers */);
5024 return result;
5027 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5028 vaddlvq_s8 (int8x16_t a)
5030 int16_t result;
5031 __asm__ ("saddlv %h0,%1.16b"
5032 : "=w"(result)
5033 : "w"(a)
5034 : /* No clobbers */);
5035 return result;
5038 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5039 vaddlvq_s16 (int16x8_t a)
5041 int32_t result;
5042 __asm__ ("saddlv %s0,%1.8h"
5043 : "=w"(result)
5044 : "w"(a)
5045 : /* No clobbers */);
5046 return result;
5049 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5050 vaddlvq_s32 (int32x4_t a)
5052 int64_t result;
5053 __asm__ ("saddlv %d0,%1.4s"
5054 : "=w"(result)
5055 : "w"(a)
5056 : /* No clobbers */);
5057 return result;
5060 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5061 vaddlvq_u8 (uint8x16_t a)
5063 uint16_t result;
5064 __asm__ ("uaddlv %h0,%1.16b"
5065 : "=w"(result)
5066 : "w"(a)
5067 : /* No clobbers */);
5068 return result;
5071 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5072 vaddlvq_u16 (uint16x8_t a)
5074 uint32_t result;
5075 __asm__ ("uaddlv %s0,%1.8h"
5076 : "=w"(result)
5077 : "w"(a)
5078 : /* No clobbers */);
5079 return result;
5082 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5083 vaddlvq_u32 (uint32x4_t a)
5085 uint64_t result;
5086 __asm__ ("uaddlv %d0,%1.4s"
5087 : "=w"(result)
5088 : "w"(a)
5089 : /* No clobbers */);
5090 return result;
5093 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5094 vcls_s8 (int8x8_t a)
5096 int8x8_t result;
5097 __asm__ ("cls %0.8b,%1.8b"
5098 : "=w"(result)
5099 : "w"(a)
5100 : /* No clobbers */);
5101 return result;
5104 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5105 vcls_s16 (int16x4_t a)
5107 int16x4_t result;
5108 __asm__ ("cls %0.4h,%1.4h"
5109 : "=w"(result)
5110 : "w"(a)
5111 : /* No clobbers */);
5112 return result;
5115 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5116 vcls_s32 (int32x2_t a)
5118 int32x2_t result;
5119 __asm__ ("cls %0.2s,%1.2s"
5120 : "=w"(result)
5121 : "w"(a)
5122 : /* No clobbers */);
5123 return result;
5126 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5127 vclsq_s8 (int8x16_t a)
5129 int8x16_t result;
5130 __asm__ ("cls %0.16b,%1.16b"
5131 : "=w"(result)
5132 : "w"(a)
5133 : /* No clobbers */);
5134 return result;
5137 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5138 vclsq_s16 (int16x8_t a)
5140 int16x8_t result;
5141 __asm__ ("cls %0.8h,%1.8h"
5142 : "=w"(result)
5143 : "w"(a)
5144 : /* No clobbers */);
5145 return result;
5148 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5149 vclsq_s32 (int32x4_t a)
5151 int32x4_t result;
5152 __asm__ ("cls %0.4s,%1.4s"
5153 : "=w"(result)
5154 : "w"(a)
5155 : /* No clobbers */);
5156 return result;
5159 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5160 vcnt_p8 (poly8x8_t a)
5162 poly8x8_t result;
5163 __asm__ ("cnt %0.8b,%1.8b"
5164 : "=w"(result)
5165 : "w"(a)
5166 : /* No clobbers */);
5167 return result;
5170 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5171 vcnt_s8 (int8x8_t a)
5173 int8x8_t result;
5174 __asm__ ("cnt %0.8b,%1.8b"
5175 : "=w"(result)
5176 : "w"(a)
5177 : /* No clobbers */);
5178 return result;
5181 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5182 vcnt_u8 (uint8x8_t a)
5184 uint8x8_t result;
5185 __asm__ ("cnt %0.8b,%1.8b"
5186 : "=w"(result)
5187 : "w"(a)
5188 : /* No clobbers */);
5189 return result;
5192 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5193 vcntq_p8 (poly8x16_t a)
5195 poly8x16_t result;
5196 __asm__ ("cnt %0.16b,%1.16b"
5197 : "=w"(result)
5198 : "w"(a)
5199 : /* No clobbers */);
5200 return result;
5203 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5204 vcntq_s8 (int8x16_t a)
5206 int8x16_t result;
5207 __asm__ ("cnt %0.16b,%1.16b"
5208 : "=w"(result)
5209 : "w"(a)
5210 : /* No clobbers */);
5211 return result;
5214 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5215 vcntq_u8 (uint8x16_t a)
5217 uint8x16_t result;
5218 __asm__ ("cnt %0.16b,%1.16b"
5219 : "=w"(result)
5220 : "w"(a)
5221 : /* No clobbers */);
5222 return result;
5225 #define vcopyq_lane_f32(a, b, c, d) \
5226 __extension__ \
5227 ({ \
5228 float32x4_t c_ = (c); \
5229 float32x4_t a_ = (a); \
5230 float32x4_t result; \
5231 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5232 : "=w"(result) \
5233 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5234 : /* No clobbers */); \
5235 result; \
5238 #define vcopyq_lane_f64(a, b, c, d) \
5239 __extension__ \
5240 ({ \
5241 float64x2_t c_ = (c); \
5242 float64x2_t a_ = (a); \
5243 float64x2_t result; \
5244 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5245 : "=w"(result) \
5246 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5247 : /* No clobbers */); \
5248 result; \
5251 #define vcopyq_lane_p8(a, b, c, d) \
5252 __extension__ \
5253 ({ \
5254 poly8x16_t c_ = (c); \
5255 poly8x16_t a_ = (a); \
5256 poly8x16_t result; \
5257 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5258 : "=w"(result) \
5259 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5260 : /* No clobbers */); \
5261 result; \
5264 #define vcopyq_lane_p16(a, b, c, d) \
5265 __extension__ \
5266 ({ \
5267 poly16x8_t c_ = (c); \
5268 poly16x8_t a_ = (a); \
5269 poly16x8_t result; \
5270 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5271 : "=w"(result) \
5272 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5273 : /* No clobbers */); \
5274 result; \
5277 #define vcopyq_lane_s8(a, b, c, d) \
5278 __extension__ \
5279 ({ \
5280 int8x16_t c_ = (c); \
5281 int8x16_t a_ = (a); \
5282 int8x16_t result; \
5283 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5284 : "=w"(result) \
5285 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5286 : /* No clobbers */); \
5287 result; \
5290 #define vcopyq_lane_s16(a, b, c, d) \
5291 __extension__ \
5292 ({ \
5293 int16x8_t c_ = (c); \
5294 int16x8_t a_ = (a); \
5295 int16x8_t result; \
5296 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5297 : "=w"(result) \
5298 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5299 : /* No clobbers */); \
5300 result; \
5303 #define vcopyq_lane_s32(a, b, c, d) \
5304 __extension__ \
5305 ({ \
5306 int32x4_t c_ = (c); \
5307 int32x4_t a_ = (a); \
5308 int32x4_t result; \
5309 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5310 : "=w"(result) \
5311 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5312 : /* No clobbers */); \
5313 result; \
5316 #define vcopyq_lane_s64(a, b, c, d) \
5317 __extension__ \
5318 ({ \
5319 int64x2_t c_ = (c); \
5320 int64x2_t a_ = (a); \
5321 int64x2_t result; \
5322 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5323 : "=w"(result) \
5324 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5325 : /* No clobbers */); \
5326 result; \
5329 #define vcopyq_lane_u8(a, b, c, d) \
5330 __extension__ \
5331 ({ \
5332 uint8x16_t c_ = (c); \
5333 uint8x16_t a_ = (a); \
5334 uint8x16_t result; \
5335 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5336 : "=w"(result) \
5337 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5338 : /* No clobbers */); \
5339 result; \
5342 #define vcopyq_lane_u16(a, b, c, d) \
5343 __extension__ \
5344 ({ \
5345 uint16x8_t c_ = (c); \
5346 uint16x8_t a_ = (a); \
5347 uint16x8_t result; \
5348 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5349 : "=w"(result) \
5350 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5351 : /* No clobbers */); \
5352 result; \
5355 #define vcopyq_lane_u32(a, b, c, d) \
5356 __extension__ \
5357 ({ \
5358 uint32x4_t c_ = (c); \
5359 uint32x4_t a_ = (a); \
5360 uint32x4_t result; \
5361 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5362 : "=w"(result) \
5363 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5364 : /* No clobbers */); \
5365 result; \
5368 #define vcopyq_lane_u64(a, b, c, d) \
5369 __extension__ \
5370 ({ \
5371 uint64x2_t c_ = (c); \
5372 uint64x2_t a_ = (a); \
5373 uint64x2_t result; \
5374 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5375 : "=w"(result) \
5376 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5377 : /* No clobbers */); \
5378 result; \
5381 /* vcvt_f16_f32 not supported */
5383 /* vcvt_f32_f16 not supported */
5385 /* vcvt_high_f16_f32 not supported */
5387 /* vcvt_high_f32_f16 not supported */
5389 static float32x2_t vdup_n_f32 (float32_t);
5391 #define vcvt_n_f32_s32(a, b) \
5392 __extension__ \
5393 ({ \
5394 int32x2_t a_ = (a); \
5395 float32x2_t result; \
5396 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5397 : "=w"(result) \
5398 : "w"(a_), "i"(b) \
5399 : /* No clobbers */); \
5400 result; \
5403 #define vcvt_n_f32_u32(a, b) \
5404 __extension__ \
5405 ({ \
5406 uint32x2_t a_ = (a); \
5407 float32x2_t result; \
5408 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5409 : "=w"(result) \
5410 : "w"(a_), "i"(b) \
5411 : /* No clobbers */); \
5412 result; \
5415 #define vcvt_n_s32_f32(a, b) \
5416 __extension__ \
5417 ({ \
5418 float32x2_t a_ = (a); \
5419 int32x2_t result; \
5420 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5421 : "=w"(result) \
5422 : "w"(a_), "i"(b) \
5423 : /* No clobbers */); \
5424 result; \
5427 #define vcvt_n_u32_f32(a, b) \
5428 __extension__ \
5429 ({ \
5430 float32x2_t a_ = (a); \
5431 uint32x2_t result; \
5432 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5433 : "=w"(result) \
5434 : "w"(a_), "i"(b) \
5435 : /* No clobbers */); \
5436 result; \
5439 #define vcvtd_n_f64_s64(a, b) \
5440 __extension__ \
5441 ({ \
5442 int64_t a_ = (a); \
5443 float64_t result; \
5444 __asm__ ("scvtf %d0,%d1,%2" \
5445 : "=w"(result) \
5446 : "w"(a_), "i"(b) \
5447 : /* No clobbers */); \
5448 result; \
5451 #define vcvtd_n_f64_u64(a, b) \
5452 __extension__ \
5453 ({ \
5454 uint64_t a_ = (a); \
5455 float64_t result; \
5456 __asm__ ("ucvtf %d0,%d1,%2" \
5457 : "=w"(result) \
5458 : "w"(a_), "i"(b) \
5459 : /* No clobbers */); \
5460 result; \
5463 #define vcvtd_n_s64_f64(a, b) \
5464 __extension__ \
5465 ({ \
5466 float64_t a_ = (a); \
5467 int64_t result; \
5468 __asm__ ("fcvtzs %d0,%d1,%2" \
5469 : "=w"(result) \
5470 : "w"(a_), "i"(b) \
5471 : /* No clobbers */); \
5472 result; \
5475 #define vcvtd_n_u64_f64(a, b) \
5476 __extension__ \
5477 ({ \
5478 float64_t a_ = (a); \
5479 uint64_t result; \
5480 __asm__ ("fcvtzu %d0,%d1,%2" \
5481 : "=w"(result) \
5482 : "w"(a_), "i"(b) \
5483 : /* No clobbers */); \
5484 result; \
5487 #define vcvtq_n_f32_s32(a, b) \
5488 __extension__ \
5489 ({ \
5490 int32x4_t a_ = (a); \
5491 float32x4_t result; \
5492 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5493 : "=w"(result) \
5494 : "w"(a_), "i"(b) \
5495 : /* No clobbers */); \
5496 result; \
5499 #define vcvtq_n_f32_u32(a, b) \
5500 __extension__ \
5501 ({ \
5502 uint32x4_t a_ = (a); \
5503 float32x4_t result; \
5504 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5505 : "=w"(result) \
5506 : "w"(a_), "i"(b) \
5507 : /* No clobbers */); \
5508 result; \
5511 #define vcvtq_n_f64_s64(a, b) \
5512 __extension__ \
5513 ({ \
5514 int64x2_t a_ = (a); \
5515 float64x2_t result; \
5516 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5517 : "=w"(result) \
5518 : "w"(a_), "i"(b) \
5519 : /* No clobbers */); \
5520 result; \
5523 #define vcvtq_n_f64_u64(a, b) \
5524 __extension__ \
5525 ({ \
5526 uint64x2_t a_ = (a); \
5527 float64x2_t result; \
5528 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5529 : "=w"(result) \
5530 : "w"(a_), "i"(b) \
5531 : /* No clobbers */); \
5532 result; \
5535 #define vcvtq_n_s32_f32(a, b) \
5536 __extension__ \
5537 ({ \
5538 float32x4_t a_ = (a); \
5539 int32x4_t result; \
5540 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5541 : "=w"(result) \
5542 : "w"(a_), "i"(b) \
5543 : /* No clobbers */); \
5544 result; \
5547 #define vcvtq_n_s64_f64(a, b) \
5548 __extension__ \
5549 ({ \
5550 float64x2_t a_ = (a); \
5551 int64x2_t result; \
5552 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5553 : "=w"(result) \
5554 : "w"(a_), "i"(b) \
5555 : /* No clobbers */); \
5556 result; \
5559 #define vcvtq_n_u32_f32(a, b) \
5560 __extension__ \
5561 ({ \
5562 float32x4_t a_ = (a); \
5563 uint32x4_t result; \
5564 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5565 : "=w"(result) \
5566 : "w"(a_), "i"(b) \
5567 : /* No clobbers */); \
5568 result; \
5571 #define vcvtq_n_u64_f64(a, b) \
5572 __extension__ \
5573 ({ \
5574 float64x2_t a_ = (a); \
5575 uint64x2_t result; \
5576 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5577 : "=w"(result) \
5578 : "w"(a_), "i"(b) \
5579 : /* No clobbers */); \
5580 result; \
5583 #define vcvts_n_f32_s32(a, b) \
5584 __extension__ \
5585 ({ \
5586 int32_t a_ = (a); \
5587 float32_t result; \
5588 __asm__ ("scvtf %s0,%s1,%2" \
5589 : "=w"(result) \
5590 : "w"(a_), "i"(b) \
5591 : /* No clobbers */); \
5592 result; \
5595 #define vcvts_n_f32_u32(a, b) \
5596 __extension__ \
5597 ({ \
5598 uint32_t a_ = (a); \
5599 float32_t result; \
5600 __asm__ ("ucvtf %s0,%s1,%2" \
5601 : "=w"(result) \
5602 : "w"(a_), "i"(b) \
5603 : /* No clobbers */); \
5604 result; \
5607 #define vcvts_n_s32_f32(a, b) \
5608 __extension__ \
5609 ({ \
5610 float32_t a_ = (a); \
5611 int32_t result; \
5612 __asm__ ("fcvtzs %s0,%s1,%2" \
5613 : "=w"(result) \
5614 : "w"(a_), "i"(b) \
5615 : /* No clobbers */); \
5616 result; \
5619 #define vcvts_n_u32_f32(a, b) \
5620 __extension__ \
5621 ({ \
5622 float32_t a_ = (a); \
5623 uint32_t result; \
5624 __asm__ ("fcvtzu %s0,%s1,%2" \
5625 : "=w"(result) \
5626 : "w"(a_), "i"(b) \
5627 : /* No clobbers */); \
5628 result; \
5631 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5632 vcvtx_f32_f64 (float64x2_t a)
5634 float32x2_t result;
5635 __asm__ ("fcvtxn %0.2s,%1.2d"
5636 : "=w"(result)
5637 : "w"(a)
5638 : /* No clobbers */);
5639 return result;
5642 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5643 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5645 float32x4_t result;
5646 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5647 : "=w"(result)
5648 : "w" (b), "0"(a)
5649 : /* No clobbers */);
5650 return result;
5653 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5654 vcvtxd_f32_f64 (float64_t a)
5656 float32_t result;
5657 __asm__ ("fcvtxn %s0,%d1"
5658 : "=w"(result)
5659 : "w"(a)
5660 : /* No clobbers */);
5661 return result;
5664 #define vext_f32(a, b, c) \
5665 __extension__ \
5666 ({ \
5667 float32x2_t b_ = (b); \
5668 float32x2_t a_ = (a); \
5669 float32x2_t result; \
5670 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5671 : "=w"(result) \
5672 : "w"(a_), "w"(b_), "i"(c) \
5673 : /* No clobbers */); \
5674 result; \
5677 #define vext_f64(a, b, c) \
5678 __extension__ \
5679 ({ \
5680 float64x1_t b_ = (b); \
5681 float64x1_t a_ = (a); \
5682 float64x1_t result; \
5683 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5684 : "=w"(result) \
5685 : "w"(a_), "w"(b_), "i"(c) \
5686 : /* No clobbers */); \
5687 result; \
5690 #define vext_p8(a, b, c) \
5691 __extension__ \
5692 ({ \
5693 poly8x8_t b_ = (b); \
5694 poly8x8_t a_ = (a); \
5695 poly8x8_t result; \
5696 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5697 : "=w"(result) \
5698 : "w"(a_), "w"(b_), "i"(c) \
5699 : /* No clobbers */); \
5700 result; \
5703 #define vext_p16(a, b, c) \
5704 __extension__ \
5705 ({ \
5706 poly16x4_t b_ = (b); \
5707 poly16x4_t a_ = (a); \
5708 poly16x4_t result; \
5709 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5710 : "=w"(result) \
5711 : "w"(a_), "w"(b_), "i"(c) \
5712 : /* No clobbers */); \
5713 result; \
5716 #define vext_s8(a, b, c) \
5717 __extension__ \
5718 ({ \
5719 int8x8_t b_ = (b); \
5720 int8x8_t a_ = (a); \
5721 int8x8_t result; \
5722 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5723 : "=w"(result) \
5724 : "w"(a_), "w"(b_), "i"(c) \
5725 : /* No clobbers */); \
5726 result; \
5729 #define vext_s16(a, b, c) \
5730 __extension__ \
5731 ({ \
5732 int16x4_t b_ = (b); \
5733 int16x4_t a_ = (a); \
5734 int16x4_t result; \
5735 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5736 : "=w"(result) \
5737 : "w"(a_), "w"(b_), "i"(c) \
5738 : /* No clobbers */); \
5739 result; \
5742 #define vext_s32(a, b, c) \
5743 __extension__ \
5744 ({ \
5745 int32x2_t b_ = (b); \
5746 int32x2_t a_ = (a); \
5747 int32x2_t result; \
5748 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5749 : "=w"(result) \
5750 : "w"(a_), "w"(b_), "i"(c) \
5751 : /* No clobbers */); \
5752 result; \
5755 #define vext_s64(a, b, c) \
5756 __extension__ \
5757 ({ \
5758 int64x1_t b_ = (b); \
5759 int64x1_t a_ = (a); \
5760 int64x1_t result; \
5761 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5762 : "=w"(result) \
5763 : "w"(a_), "w"(b_), "i"(c) \
5764 : /* No clobbers */); \
5765 result; \
5768 #define vext_u8(a, b, c) \
5769 __extension__ \
5770 ({ \
5771 uint8x8_t b_ = (b); \
5772 uint8x8_t a_ = (a); \
5773 uint8x8_t result; \
5774 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5775 : "=w"(result) \
5776 : "w"(a_), "w"(b_), "i"(c) \
5777 : /* No clobbers */); \
5778 result; \
5781 #define vext_u16(a, b, c) \
5782 __extension__ \
5783 ({ \
5784 uint16x4_t b_ = (b); \
5785 uint16x4_t a_ = (a); \
5786 uint16x4_t result; \
5787 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5788 : "=w"(result) \
5789 : "w"(a_), "w"(b_), "i"(c) \
5790 : /* No clobbers */); \
5791 result; \
5794 #define vext_u32(a, b, c) \
5795 __extension__ \
5796 ({ \
5797 uint32x2_t b_ = (b); \
5798 uint32x2_t a_ = (a); \
5799 uint32x2_t result; \
5800 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5801 : "=w"(result) \
5802 : "w"(a_), "w"(b_), "i"(c) \
5803 : /* No clobbers */); \
5804 result; \
5807 #define vext_u64(a, b, c) \
5808 __extension__ \
5809 ({ \
5810 uint64x1_t b_ = (b); \
5811 uint64x1_t a_ = (a); \
5812 uint64x1_t result; \
5813 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5814 : "=w"(result) \
5815 : "w"(a_), "w"(b_), "i"(c) \
5816 : /* No clobbers */); \
5817 result; \
5820 #define vextq_f32(a, b, c) \
5821 __extension__ \
5822 ({ \
5823 float32x4_t b_ = (b); \
5824 float32x4_t a_ = (a); \
5825 float32x4_t result; \
5826 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5827 : "=w"(result) \
5828 : "w"(a_), "w"(b_), "i"(c) \
5829 : /* No clobbers */); \
5830 result; \
5833 #define vextq_f64(a, b, c) \
5834 __extension__ \
5835 ({ \
5836 float64x2_t b_ = (b); \
5837 float64x2_t a_ = (a); \
5838 float64x2_t result; \
5839 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5840 : "=w"(result) \
5841 : "w"(a_), "w"(b_), "i"(c) \
5842 : /* No clobbers */); \
5843 result; \
5846 #define vextq_p8(a, b, c) \
5847 __extension__ \
5848 ({ \
5849 poly8x16_t b_ = (b); \
5850 poly8x16_t a_ = (a); \
5851 poly8x16_t result; \
5852 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5853 : "=w"(result) \
5854 : "w"(a_), "w"(b_), "i"(c) \
5855 : /* No clobbers */); \
5856 result; \
5859 #define vextq_p16(a, b, c) \
5860 __extension__ \
5861 ({ \
5862 poly16x8_t b_ = (b); \
5863 poly16x8_t a_ = (a); \
5864 poly16x8_t result; \
5865 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5866 : "=w"(result) \
5867 : "w"(a_), "w"(b_), "i"(c) \
5868 : /* No clobbers */); \
5869 result; \
5872 #define vextq_s8(a, b, c) \
5873 __extension__ \
5874 ({ \
5875 int8x16_t b_ = (b); \
5876 int8x16_t a_ = (a); \
5877 int8x16_t result; \
5878 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5879 : "=w"(result) \
5880 : "w"(a_), "w"(b_), "i"(c) \
5881 : /* No clobbers */); \
5882 result; \
5885 #define vextq_s16(a, b, c) \
5886 __extension__ \
5887 ({ \
5888 int16x8_t b_ = (b); \
5889 int16x8_t a_ = (a); \
5890 int16x8_t result; \
5891 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5892 : "=w"(result) \
5893 : "w"(a_), "w"(b_), "i"(c) \
5894 : /* No clobbers */); \
5895 result; \
5898 #define vextq_s32(a, b, c) \
5899 __extension__ \
5900 ({ \
5901 int32x4_t b_ = (b); \
5902 int32x4_t a_ = (a); \
5903 int32x4_t result; \
5904 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5905 : "=w"(result) \
5906 : "w"(a_), "w"(b_), "i"(c) \
5907 : /* No clobbers */); \
5908 result; \
5911 #define vextq_s64(a, b, c) \
5912 __extension__ \
5913 ({ \
5914 int64x2_t b_ = (b); \
5915 int64x2_t a_ = (a); \
5916 int64x2_t result; \
5917 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5918 : "=w"(result) \
5919 : "w"(a_), "w"(b_), "i"(c) \
5920 : /* No clobbers */); \
5921 result; \
5924 #define vextq_u8(a, b, c) \
5925 __extension__ \
5926 ({ \
5927 uint8x16_t b_ = (b); \
5928 uint8x16_t a_ = (a); \
5929 uint8x16_t result; \
5930 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5931 : "=w"(result) \
5932 : "w"(a_), "w"(b_), "i"(c) \
5933 : /* No clobbers */); \
5934 result; \
5937 #define vextq_u16(a, b, c) \
5938 __extension__ \
5939 ({ \
5940 uint16x8_t b_ = (b); \
5941 uint16x8_t a_ = (a); \
5942 uint16x8_t result; \
5943 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5944 : "=w"(result) \
5945 : "w"(a_), "w"(b_), "i"(c) \
5946 : /* No clobbers */); \
5947 result; \
5950 #define vextq_u32(a, b, c) \
5951 __extension__ \
5952 ({ \
5953 uint32x4_t b_ = (b); \
5954 uint32x4_t a_ = (a); \
5955 uint32x4_t result; \
5956 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5957 : "=w"(result) \
5958 : "w"(a_), "w"(b_), "i"(c) \
5959 : /* No clobbers */); \
5960 result; \
5963 #define vextq_u64(a, b, c) \
5964 __extension__ \
5965 ({ \
5966 uint64x2_t b_ = (b); \
5967 uint64x2_t a_ = (a); \
5968 uint64x2_t result; \
5969 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5970 : "=w"(result) \
5971 : "w"(a_), "w"(b_), "i"(c) \
5972 : /* No clobbers */); \
5973 result; \
5976 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5977 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5979 float32x2_t result;
5980 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5981 : "=w"(result)
5982 : "0"(a), "w"(b), "w"(c)
5983 : /* No clobbers */);
5984 return result;
5987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5988 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5990 float32x4_t result;
5991 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5992 : "=w"(result)
5993 : "0"(a), "w"(b), "w"(c)
5994 : /* No clobbers */);
5995 return result;
5998 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5999 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6001 float64x2_t result;
6002 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6003 : "=w"(result)
6004 : "0"(a), "w"(b), "w"(c)
6005 : /* No clobbers */);
6006 return result;
6009 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6010 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6012 float32x2_t result;
6013 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6014 : "=w"(result)
6015 : "0"(a), "w"(b), "w"(c)
6016 : /* No clobbers */);
6017 return result;
6020 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6021 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6023 float32x4_t result;
6024 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6025 : "=w"(result)
6026 : "0"(a), "w"(b), "w"(c)
6027 : /* No clobbers */);
6028 return result;
6031 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6032 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
6034 float64x2_t result;
6035 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6036 : "=w"(result)
6037 : "0"(a), "w"(b), "w"(c)
6038 : /* No clobbers */);
6039 return result;
6042 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6043 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6045 float32x2_t result;
6046 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6047 : "=w"(result)
6048 : "0"(a), "w"(b), "w"(c)
6049 : /* No clobbers */);
6050 return result;
6053 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6054 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6056 float32x4_t result;
6057 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6058 : "=w"(result)
6059 : "0"(a), "w"(b), "w"(c)
6060 : /* No clobbers */);
6061 return result;
6064 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6065 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6067 float64x2_t result;
6068 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6069 : "=w"(result)
6070 : "0"(a), "w"(b), "w"(c)
6071 : /* No clobbers */);
6072 return result;
6075 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6076 vget_high_f32 (float32x4_t a)
6078 float32x2_t result;
6079 __asm__ ("ins %0.d[0], %1.d[1]"
6080 : "=w"(result)
6081 : "w"(a)
6082 : /* No clobbers */);
6083 return result;
6086 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6087 vget_high_f64 (float64x2_t a)
6089 float64x1_t result;
6090 __asm__ ("ins %0.d[0], %1.d[1]"
6091 : "=w"(result)
6092 : "w"(a)
6093 : /* No clobbers */);
6094 return result;
6097 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6098 vget_high_p8 (poly8x16_t a)
6100 poly8x8_t result;
6101 __asm__ ("ins %0.d[0], %1.d[1]"
6102 : "=w"(result)
6103 : "w"(a)
6104 : /* No clobbers */);
6105 return result;
6108 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6109 vget_high_p16 (poly16x8_t a)
6111 poly16x4_t result;
6112 __asm__ ("ins %0.d[0], %1.d[1]"
6113 : "=w"(result)
6114 : "w"(a)
6115 : /* No clobbers */);
6116 return result;
6119 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6120 vget_high_s8 (int8x16_t a)
6122 int8x8_t result;
6123 __asm__ ("ins %0.d[0], %1.d[1]"
6124 : "=w"(result)
6125 : "w"(a)
6126 : /* No clobbers */);
6127 return result;
6130 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6131 vget_high_s16 (int16x8_t a)
6133 int16x4_t result;
6134 __asm__ ("ins %0.d[0], %1.d[1]"
6135 : "=w"(result)
6136 : "w"(a)
6137 : /* No clobbers */);
6138 return result;
6141 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6142 vget_high_s32 (int32x4_t a)
6144 int32x2_t result;
6145 __asm__ ("ins %0.d[0], %1.d[1]"
6146 : "=w"(result)
6147 : "w"(a)
6148 : /* No clobbers */);
6149 return result;
6152 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6153 vget_high_s64 (int64x2_t a)
6155 int64x1_t result;
6156 __asm__ ("ins %0.d[0], %1.d[1]"
6157 : "=w"(result)
6158 : "w"(a)
6159 : /* No clobbers */);
6160 return result;
6163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6164 vget_high_u8 (uint8x16_t a)
6166 uint8x8_t result;
6167 __asm__ ("ins %0.d[0], %1.d[1]"
6168 : "=w"(result)
6169 : "w"(a)
6170 : /* No clobbers */);
6171 return result;
6174 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6175 vget_high_u16 (uint16x8_t a)
6177 uint16x4_t result;
6178 __asm__ ("ins %0.d[0], %1.d[1]"
6179 : "=w"(result)
6180 : "w"(a)
6181 : /* No clobbers */);
6182 return result;
6185 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6186 vget_high_u32 (uint32x4_t a)
6188 uint32x2_t result;
6189 __asm__ ("ins %0.d[0], %1.d[1]"
6190 : "=w"(result)
6191 : "w"(a)
6192 : /* No clobbers */);
6193 return result;
6196 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6197 vget_high_u64 (uint64x2_t a)
6199 uint64x1_t result;
6200 __asm__ ("ins %0.d[0], %1.d[1]"
6201 : "=w"(result)
6202 : "w"(a)
6203 : /* No clobbers */);
6204 return result;
6207 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6208 vhsub_s8 (int8x8_t a, int8x8_t b)
6210 int8x8_t result;
6211 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6212 : "=w"(result)
6213 : "w"(a), "w"(b)
6214 : /* No clobbers */);
6215 return result;
6218 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6219 vhsub_s16 (int16x4_t a, int16x4_t b)
6221 int16x4_t result;
6222 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6223 : "=w"(result)
6224 : "w"(a), "w"(b)
6225 : /* No clobbers */);
6226 return result;
6229 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6230 vhsub_s32 (int32x2_t a, int32x2_t b)
6232 int32x2_t result;
6233 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6234 : "=w"(result)
6235 : "w"(a), "w"(b)
6236 : /* No clobbers */);
6237 return result;
6240 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6241 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6243 uint8x8_t result;
6244 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6245 : "=w"(result)
6246 : "w"(a), "w"(b)
6247 : /* No clobbers */);
6248 return result;
6251 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6252 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6254 uint16x4_t result;
6255 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6256 : "=w"(result)
6257 : "w"(a), "w"(b)
6258 : /* No clobbers */);
6259 return result;
6262 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6263 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6265 uint32x2_t result;
6266 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6267 : "=w"(result)
6268 : "w"(a), "w"(b)
6269 : /* No clobbers */);
6270 return result;
6273 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6274 vhsubq_s8 (int8x16_t a, int8x16_t b)
6276 int8x16_t result;
6277 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6278 : "=w"(result)
6279 : "w"(a), "w"(b)
6280 : /* No clobbers */);
6281 return result;
6284 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6285 vhsubq_s16 (int16x8_t a, int16x8_t b)
6287 int16x8_t result;
6288 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6289 : "=w"(result)
6290 : "w"(a), "w"(b)
6291 : /* No clobbers */);
6292 return result;
6295 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6296 vhsubq_s32 (int32x4_t a, int32x4_t b)
6298 int32x4_t result;
6299 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6300 : "=w"(result)
6301 : "w"(a), "w"(b)
6302 : /* No clobbers */);
6303 return result;
6306 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6307 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6309 uint8x16_t result;
6310 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6311 : "=w"(result)
6312 : "w"(a), "w"(b)
6313 : /* No clobbers */);
6314 return result;
6317 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6318 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6320 uint16x8_t result;
6321 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6322 : "=w"(result)
6323 : "w"(a), "w"(b)
6324 : /* No clobbers */);
6325 return result;
6328 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6329 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6331 uint32x4_t result;
6332 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6333 : "=w"(result)
6334 : "w"(a), "w"(b)
6335 : /* No clobbers */);
6336 return result;
6339 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6340 vld1_dup_f32 (const float32_t * a)
6342 float32x2_t result;
6343 __asm__ ("ld1r {%0.2s}, %1"
6344 : "=w"(result)
6345 : "Utv"(*a)
6346 : /* No clobbers */);
6347 return result;
6350 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6351 vld1_dup_f64 (const float64_t * a)
6353 float64x1_t result;
6354 __asm__ ("ld1r {%0.1d}, %1"
6355 : "=w"(result)
6356 : "Utv"(*a)
6357 : /* No clobbers */);
6358 return result;
6361 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6362 vld1_dup_p8 (const poly8_t * a)
6364 poly8x8_t result;
6365 __asm__ ("ld1r {%0.8b}, %1"
6366 : "=w"(result)
6367 : "Utv"(*a)
6368 : /* No clobbers */);
6369 return result;
6372 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6373 vld1_dup_p16 (const poly16_t * a)
6375 poly16x4_t result;
6376 __asm__ ("ld1r {%0.4h}, %1"
6377 : "=w"(result)
6378 : "Utv"(*a)
6379 : /* No clobbers */);
6380 return result;
6383 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6384 vld1_dup_s8 (const int8_t * a)
6386 int8x8_t result;
6387 __asm__ ("ld1r {%0.8b}, %1"
6388 : "=w"(result)
6389 : "Utv"(*a)
6390 : /* No clobbers */);
6391 return result;
6394 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6395 vld1_dup_s16 (const int16_t * a)
6397 int16x4_t result;
6398 __asm__ ("ld1r {%0.4h}, %1"
6399 : "=w"(result)
6400 : "Utv"(*a)
6401 : /* No clobbers */);
6402 return result;
6405 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6406 vld1_dup_s32 (const int32_t * a)
6408 int32x2_t result;
6409 __asm__ ("ld1r {%0.2s}, %1"
6410 : "=w"(result)
6411 : "Utv"(*a)
6412 : /* No clobbers */);
6413 return result;
6416 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6417 vld1_dup_s64 (const int64_t * a)
6419 int64x1_t result;
6420 __asm__ ("ld1r {%0.1d}, %1"
6421 : "=w"(result)
6422 : "Utv"(*a)
6423 : /* No clobbers */);
6424 return result;
6427 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6428 vld1_dup_u8 (const uint8_t * a)
6430 uint8x8_t result;
6431 __asm__ ("ld1r {%0.8b}, %1"
6432 : "=w"(result)
6433 : "Utv"(*a)
6434 : /* No clobbers */);
6435 return result;
6438 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6439 vld1_dup_u16 (const uint16_t * a)
6441 uint16x4_t result;
6442 __asm__ ("ld1r {%0.4h}, %1"
6443 : "=w"(result)
6444 : "Utv"(*a)
6445 : /* No clobbers */);
6446 return result;
6449 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6450 vld1_dup_u32 (const uint32_t * a)
6452 uint32x2_t result;
6453 __asm__ ("ld1r {%0.2s}, %1"
6454 : "=w"(result)
6455 : "Utv"(*a)
6456 : /* No clobbers */);
6457 return result;
6460 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6461 vld1_dup_u64 (const uint64_t * a)
6463 uint64x1_t result;
6464 __asm__ ("ld1r {%0.1d}, %1"
6465 : "=w"(result)
6466 : "Utv"(*a)
6467 : /* No clobbers */);
6468 return result;
6471 #define vld1_lane_f32(a, b, c) \
6472 __extension__ \
6473 ({ \
6474 float32x2_t b_ = (b); \
6475 const float32_t * a_ = (a); \
6476 float32x2_t result; \
6477 __asm__ ("ld1 {%0.s}[%1], %2" \
6478 : "=w"(result) \
6479 : "i" (c), "Utv"(*a_), "0"(b_) \
6480 : /* No clobbers */); \
6481 result; \
6484 #define vld1_lane_f64(a, b, c) \
6485 __extension__ \
6486 ({ \
6487 float64x1_t b_ = (b); \
6488 const float64_t * a_ = (a); \
6489 float64x1_t result; \
6490 __asm__ ("ld1 {%0.d}[%1], %2" \
6491 : "=w"(result) \
6492 : "i" (c), "Utv"(*a_), "0"(b_) \
6493 : /* No clobbers */); \
6494 result; \
6497 #define vld1_lane_p8(a, b, c) \
6498 __extension__ \
6499 ({ \
6500 poly8x8_t b_ = (b); \
6501 const poly8_t * a_ = (a); \
6502 poly8x8_t result; \
6503 __asm__ ("ld1 {%0.b}[%1], %2" \
6504 : "=w"(result) \
6505 : "i" (c), "Utv"(*a_), "0"(b_) \
6506 : /* No clobbers */); \
6507 result; \
6510 #define vld1_lane_p16(a, b, c) \
6511 __extension__ \
6512 ({ \
6513 poly16x4_t b_ = (b); \
6514 const poly16_t * a_ = (a); \
6515 poly16x4_t result; \
6516 __asm__ ("ld1 {%0.h}[%1], %2" \
6517 : "=w"(result) \
6518 : "i" (c), "Utv"(*a_), "0"(b_) \
6519 : /* No clobbers */); \
6520 result; \
6523 #define vld1_lane_s8(a, b, c) \
6524 __extension__ \
6525 ({ \
6526 int8x8_t b_ = (b); \
6527 const int8_t * a_ = (a); \
6528 int8x8_t result; \
6529 __asm__ ("ld1 {%0.b}[%1], %2" \
6530 : "=w"(result) \
6531 : "i" (c), "Utv"(*a_), "0"(b_) \
6532 : /* No clobbers */); \
6533 result; \
6536 #define vld1_lane_s16(a, b, c) \
6537 __extension__ \
6538 ({ \
6539 int16x4_t b_ = (b); \
6540 const int16_t * a_ = (a); \
6541 int16x4_t result; \
6542 __asm__ ("ld1 {%0.h}[%1], %2" \
6543 : "=w"(result) \
6544 : "i" (c), "Utv"(*a_), "0"(b_) \
6545 : /* No clobbers */); \
6546 result; \
6549 #define vld1_lane_s32(a, b, c) \
6550 __extension__ \
6551 ({ \
6552 int32x2_t b_ = (b); \
6553 const int32_t * a_ = (a); \
6554 int32x2_t result; \
6555 __asm__ ("ld1 {%0.s}[%1], %2" \
6556 : "=w"(result) \
6557 : "i" (c), "Utv"(*a_), "0"(b_) \
6558 : /* No clobbers */); \
6559 result; \
6562 #define vld1_lane_s64(a, b, c) \
6563 __extension__ \
6564 ({ \
6565 int64x1_t b_ = (b); \
6566 const int64_t * a_ = (a); \
6567 int64x1_t result; \
6568 __asm__ ("ld1 {%0.d}[%1], %2" \
6569 : "=w"(result) \
6570 : "i" (c), "Utv"(*a_), "0"(b_) \
6571 : /* No clobbers */); \
6572 result; \
6575 #define vld1_lane_u8(a, b, c) \
6576 __extension__ \
6577 ({ \
6578 uint8x8_t b_ = (b); \
6579 const uint8_t * a_ = (a); \
6580 uint8x8_t result; \
6581 __asm__ ("ld1 {%0.b}[%1], %2" \
6582 : "=w"(result) \
6583 : "i" (c), "Utv"(*a_), "0"(b_) \
6584 : /* No clobbers */); \
6585 result; \
6588 #define vld1_lane_u16(a, b, c) \
6589 __extension__ \
6590 ({ \
6591 uint16x4_t b_ = (b); \
6592 const uint16_t * a_ = (a); \
6593 uint16x4_t result; \
6594 __asm__ ("ld1 {%0.h}[%1], %2" \
6595 : "=w"(result) \
6596 : "i" (c), "Utv"(*a_), "0"(b_) \
6597 : /* No clobbers */); \
6598 result; \
6601 #define vld1_lane_u32(a, b, c) \
6602 __extension__ \
6603 ({ \
6604 uint32x2_t b_ = (b); \
6605 const uint32_t * a_ = (a); \
6606 uint32x2_t result; \
6607 __asm__ ("ld1 {%0.s}[%1], %2" \
6608 : "=w"(result) \
6609 : "i" (c), "Utv"(*a_), "0"(b_) \
6610 : /* No clobbers */); \
6611 result; \
6614 #define vld1_lane_u64(a, b, c) \
6615 __extension__ \
6616 ({ \
6617 uint64x1_t b_ = (b); \
6618 const uint64_t * a_ = (a); \
6619 uint64x1_t result; \
6620 __asm__ ("ld1 {%0.d}[%1], %2" \
6621 : "=w"(result) \
6622 : "i" (c), "Utv"(*a_), "0"(b_) \
6623 : /* No clobbers */); \
6624 result; \
6627 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6628 vld1q_dup_f32 (const float32_t * a)
6630 float32x4_t result;
6631 __asm__ ("ld1r {%0.4s}, %1"
6632 : "=w"(result)
6633 : "Utv"(*a)
6634 : /* No clobbers */);
6635 return result;
6638 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6639 vld1q_dup_f64 (const float64_t * a)
6641 float64x2_t result;
6642 __asm__ ("ld1r {%0.2d}, %1"
6643 : "=w"(result)
6644 : "Utv"(*a)
6645 : /* No clobbers */);
6646 return result;
6649 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6650 vld1q_dup_p8 (const poly8_t * a)
6652 poly8x16_t result;
6653 __asm__ ("ld1r {%0.16b}, %1"
6654 : "=w"(result)
6655 : "Utv"(*a)
6656 : /* No clobbers */);
6657 return result;
6660 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6661 vld1q_dup_p16 (const poly16_t * a)
6663 poly16x8_t result;
6664 __asm__ ("ld1r {%0.8h}, %1"
6665 : "=w"(result)
6666 : "Utv"(*a)
6667 : /* No clobbers */);
6668 return result;
6671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6672 vld1q_dup_s8 (const int8_t * a)
6674 int8x16_t result;
6675 __asm__ ("ld1r {%0.16b}, %1"
6676 : "=w"(result)
6677 : "Utv"(*a)
6678 : /* No clobbers */);
6679 return result;
6682 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6683 vld1q_dup_s16 (const int16_t * a)
6685 int16x8_t result;
6686 __asm__ ("ld1r {%0.8h}, %1"
6687 : "=w"(result)
6688 : "Utv"(*a)
6689 : /* No clobbers */);
6690 return result;
6693 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6694 vld1q_dup_s32 (const int32_t * a)
6696 int32x4_t result;
6697 __asm__ ("ld1r {%0.4s}, %1"
6698 : "=w"(result)
6699 : "Utv"(*a)
6700 : /* No clobbers */);
6701 return result;
6704 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6705 vld1q_dup_s64 (const int64_t * a)
6707 int64x2_t result;
6708 __asm__ ("ld1r {%0.2d}, %1"
6709 : "=w"(result)
6710 : "Utv"(*a)
6711 : /* No clobbers */);
6712 return result;
6715 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6716 vld1q_dup_u8 (const uint8_t * a)
6718 uint8x16_t result;
6719 __asm__ ("ld1r {%0.16b}, %1"
6720 : "=w"(result)
6721 : "Utv"(*a)
6722 : /* No clobbers */);
6723 return result;
6726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6727 vld1q_dup_u16 (const uint16_t * a)
6729 uint16x8_t result;
6730 __asm__ ("ld1r {%0.8h}, %1"
6731 : "=w"(result)
6732 : "Utv"(*a)
6733 : /* No clobbers */);
6734 return result;
6737 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6738 vld1q_dup_u32 (const uint32_t * a)
6740 uint32x4_t result;
6741 __asm__ ("ld1r {%0.4s}, %1"
6742 : "=w"(result)
6743 : "Utv"(*a)
6744 : /* No clobbers */);
6745 return result;
6748 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6749 vld1q_dup_u64 (const uint64_t * a)
6751 uint64x2_t result;
6752 __asm__ ("ld1r {%0.2d}, %1"
6753 : "=w"(result)
6754 : "Utv"(*a)
6755 : /* No clobbers */);
6756 return result;
6759 #define vld1q_lane_f32(a, b, c) \
6760 __extension__ \
6761 ({ \
6762 float32x4_t b_ = (b); \
6763 const float32_t * a_ = (a); \
6764 float32x4_t result; \
6765 __asm__ ("ld1 {%0.s}[%1], %2" \
6766 : "=w"(result) \
6767 : "i"(c), "Utv"(*a_), "0"(b_) \
6768 : /* No clobbers */); \
6769 result; \
6772 #define vld1q_lane_f64(a, b, c) \
6773 __extension__ \
6774 ({ \
6775 float64x2_t b_ = (b); \
6776 const float64_t * a_ = (a); \
6777 float64x2_t result; \
6778 __asm__ ("ld1 {%0.d}[%1], %2" \
6779 : "=w"(result) \
6780 : "i"(c), "Utv"(*a_), "0"(b_) \
6781 : /* No clobbers */); \
6782 result; \
6785 #define vld1q_lane_p8(a, b, c) \
6786 __extension__ \
6787 ({ \
6788 poly8x16_t b_ = (b); \
6789 const poly8_t * a_ = (a); \
6790 poly8x16_t result; \
6791 __asm__ ("ld1 {%0.b}[%1], %2" \
6792 : "=w"(result) \
6793 : "i"(c), "Utv"(*a_), "0"(b_) \
6794 : /* No clobbers */); \
6795 result; \
6798 #define vld1q_lane_p16(a, b, c) \
6799 __extension__ \
6800 ({ \
6801 poly16x8_t b_ = (b); \
6802 const poly16_t * a_ = (a); \
6803 poly16x8_t result; \
6804 __asm__ ("ld1 {%0.h}[%1], %2" \
6805 : "=w"(result) \
6806 : "i"(c), "Utv"(*a_), "0"(b_) \
6807 : /* No clobbers */); \
6808 result; \
6811 #define vld1q_lane_s8(a, b, c) \
6812 __extension__ \
6813 ({ \
6814 int8x16_t b_ = (b); \
6815 const int8_t * a_ = (a); \
6816 int8x16_t result; \
6817 __asm__ ("ld1 {%0.b}[%1], %2" \
6818 : "=w"(result) \
6819 : "i"(c), "Utv"(*a_), "0"(b_) \
6820 : /* No clobbers */); \
6821 result; \
6824 #define vld1q_lane_s16(a, b, c) \
6825 __extension__ \
6826 ({ \
6827 int16x8_t b_ = (b); \
6828 const int16_t * a_ = (a); \
6829 int16x8_t result; \
6830 __asm__ ("ld1 {%0.h}[%1], %2" \
6831 : "=w"(result) \
6832 : "i"(c), "Utv"(*a_), "0"(b_) \
6833 : /* No clobbers */); \
6834 result; \
6837 #define vld1q_lane_s32(a, b, c) \
6838 __extension__ \
6839 ({ \
6840 int32x4_t b_ = (b); \
6841 const int32_t * a_ = (a); \
6842 int32x4_t result; \
6843 __asm__ ("ld1 {%0.s}[%1], %2" \
6844 : "=w"(result) \
6845 : "i"(c), "Utv"(*a_), "0"(b_) \
6846 : /* No clobbers */); \
6847 result; \
6850 #define vld1q_lane_s64(a, b, c) \
6851 __extension__ \
6852 ({ \
6853 int64x2_t b_ = (b); \
6854 const int64_t * a_ = (a); \
6855 int64x2_t result; \
6856 __asm__ ("ld1 {%0.d}[%1], %2" \
6857 : "=w"(result) \
6858 : "i"(c), "Utv"(*a_), "0"(b_) \
6859 : /* No clobbers */); \
6860 result; \
6863 #define vld1q_lane_u8(a, b, c) \
6864 __extension__ \
6865 ({ \
6866 uint8x16_t b_ = (b); \
6867 const uint8_t * a_ = (a); \
6868 uint8x16_t result; \
6869 __asm__ ("ld1 {%0.b}[%1], %2" \
6870 : "=w"(result) \
6871 : "i"(c), "Utv"(*a_), "0"(b_) \
6872 : /* No clobbers */); \
6873 result; \
6876 #define vld1q_lane_u16(a, b, c) \
6877 __extension__ \
6878 ({ \
6879 uint16x8_t b_ = (b); \
6880 const uint16_t * a_ = (a); \
6881 uint16x8_t result; \
6882 __asm__ ("ld1 {%0.h}[%1], %2" \
6883 : "=w"(result) \
6884 : "i"(c), "Utv"(*a_), "0"(b_) \
6885 : /* No clobbers */); \
6886 result; \
6889 #define vld1q_lane_u32(a, b, c) \
6890 __extension__ \
6891 ({ \
6892 uint32x4_t b_ = (b); \
6893 const uint32_t * a_ = (a); \
6894 uint32x4_t result; \
6895 __asm__ ("ld1 {%0.s}[%1], %2" \
6896 : "=w"(result) \
6897 : "i"(c), "Utv"(*a_), "0"(b_) \
6898 : /* No clobbers */); \
6899 result; \
6902 #define vld1q_lane_u64(a, b, c) \
6903 __extension__ \
6904 ({ \
6905 uint64x2_t b_ = (b); \
6906 const uint64_t * a_ = (a); \
6907 uint64x2_t result; \
6908 __asm__ ("ld1 {%0.d}[%1], %2" \
6909 : "=w"(result) \
6910 : "i"(c), "Utv"(*a_), "0"(b_) \
6911 : /* No clobbers */); \
6912 result; \
6915 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6916 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6918 float32x2_t result;
6919 float32x2_t t1;
6920 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6921 : "=w"(result), "=w"(t1)
6922 : "0"(a), "w"(b), "w"(c)
6923 : /* No clobbers */);
6924 return result;
6927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6928 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6930 int16x4_t result;
6931 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6932 : "=w"(result)
6933 : "0"(a), "w"(b), "x"(c)
6934 : /* No clobbers */);
6935 return result;
6938 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6939 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6941 int32x2_t result;
6942 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6943 : "=w"(result)
6944 : "0"(a), "w"(b), "w"(c)
6945 : /* No clobbers */);
6946 return result;
6949 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6950 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6952 uint16x4_t result;
6953 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6954 : "=w"(result)
6955 : "0"(a), "w"(b), "x"(c)
6956 : /* No clobbers */);
6957 return result;
6960 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6961 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6963 uint32x2_t result;
6964 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6965 : "=w"(result)
6966 : "0"(a), "w"(b), "w"(c)
6967 : /* No clobbers */);
6968 return result;
6971 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6972 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6974 int8x8_t result;
6975 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6976 : "=w"(result)
6977 : "0"(a), "w"(b), "w"(c)
6978 : /* No clobbers */);
6979 return result;
6982 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6983 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6985 int16x4_t result;
6986 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6987 : "=w"(result)
6988 : "0"(a), "w"(b), "w"(c)
6989 : /* No clobbers */);
6990 return result;
6993 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6994 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6996 int32x2_t result;
6997 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6998 : "=w"(result)
6999 : "0"(a), "w"(b), "w"(c)
7000 : /* No clobbers */);
7001 return result;
7004 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7005 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7007 uint8x8_t result;
7008 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7009 : "=w"(result)
7010 : "0"(a), "w"(b), "w"(c)
7011 : /* No clobbers */);
7012 return result;
7015 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7016 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7018 uint16x4_t result;
7019 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7020 : "=w"(result)
7021 : "0"(a), "w"(b), "w"(c)
7022 : /* No clobbers */);
7023 return result;
7026 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7027 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7029 uint32x2_t result;
7030 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7031 : "=w"(result)
7032 : "0"(a), "w"(b), "w"(c)
7033 : /* No clobbers */);
7034 return result;
7037 #define vmlal_high_lane_s16(a, b, c, d) \
7038 __extension__ \
7039 ({ \
7040 int16x8_t c_ = (c); \
7041 int16x8_t b_ = (b); \
7042 int32x4_t a_ = (a); \
7043 int32x4_t result; \
7044 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7045 : "=w"(result) \
7046 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7047 : /* No clobbers */); \
7048 result; \
7051 #define vmlal_high_lane_s32(a, b, c, d) \
7052 __extension__ \
7053 ({ \
7054 int32x4_t c_ = (c); \
7055 int32x4_t b_ = (b); \
7056 int64x2_t a_ = (a); \
7057 int64x2_t result; \
7058 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7059 : "=w"(result) \
7060 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7061 : /* No clobbers */); \
7062 result; \
7065 #define vmlal_high_lane_u16(a, b, c, d) \
7066 __extension__ \
7067 ({ \
7068 uint16x8_t c_ = (c); \
7069 uint16x8_t b_ = (b); \
7070 uint32x4_t a_ = (a); \
7071 uint32x4_t result; \
7072 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7073 : "=w"(result) \
7074 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7075 : /* No clobbers */); \
7076 result; \
7079 #define vmlal_high_lane_u32(a, b, c, d) \
7080 __extension__ \
7081 ({ \
7082 uint32x4_t c_ = (c); \
7083 uint32x4_t b_ = (b); \
7084 uint64x2_t a_ = (a); \
7085 uint64x2_t result; \
7086 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7087 : "=w"(result) \
7088 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7089 : /* No clobbers */); \
7090 result; \
7093 #define vmlal_high_laneq_s16(a, b, c, d) \
7094 __extension__ \
7095 ({ \
7096 int16x8_t c_ = (c); \
7097 int16x8_t b_ = (b); \
7098 int32x4_t a_ = (a); \
7099 int32x4_t result; \
7100 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7101 : "=w"(result) \
7102 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7103 : /* No clobbers */); \
7104 result; \
7107 #define vmlal_high_laneq_s32(a, b, c, d) \
7108 __extension__ \
7109 ({ \
7110 int32x4_t c_ = (c); \
7111 int32x4_t b_ = (b); \
7112 int64x2_t a_ = (a); \
7113 int64x2_t result; \
7114 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7115 : "=w"(result) \
7116 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7117 : /* No clobbers */); \
7118 result; \
7121 #define vmlal_high_laneq_u16(a, b, c, d) \
7122 __extension__ \
7123 ({ \
7124 uint16x8_t c_ = (c); \
7125 uint16x8_t b_ = (b); \
7126 uint32x4_t a_ = (a); \
7127 uint32x4_t result; \
7128 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7129 : "=w"(result) \
7130 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7131 : /* No clobbers */); \
7132 result; \
7135 #define vmlal_high_laneq_u32(a, b, c, d) \
7136 __extension__ \
7137 ({ \
7138 uint32x4_t c_ = (c); \
7139 uint32x4_t b_ = (b); \
7140 uint64x2_t a_ = (a); \
7141 uint64x2_t result; \
7142 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7143 : "=w"(result) \
7144 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7145 : /* No clobbers */); \
7146 result; \
7149 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7150 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7152 int32x4_t result;
7153 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7154 : "=w"(result)
7155 : "0"(a), "w"(b), "x"(c)
7156 : /* No clobbers */);
7157 return result;
7160 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7161 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7163 int64x2_t result;
7164 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
7165 : "=w"(result)
7166 : "0"(a), "w"(b), "w"(c)
7167 : /* No clobbers */);
7168 return result;
7171 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7172 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7174 uint32x4_t result;
7175 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
7176 : "=w"(result)
7177 : "0"(a), "w"(b), "x"(c)
7178 : /* No clobbers */);
7179 return result;
7182 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7183 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7185 uint64x2_t result;
7186 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
7187 : "=w"(result)
7188 : "0"(a), "w"(b), "w"(c)
7189 : /* No clobbers */);
7190 return result;
7193 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7194 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7196 int16x8_t result;
7197 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
7198 : "=w"(result)
7199 : "0"(a), "w"(b), "w"(c)
7200 : /* No clobbers */);
7201 return result;
7204 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7205 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7207 int32x4_t result;
7208 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
7209 : "=w"(result)
7210 : "0"(a), "w"(b), "w"(c)
7211 : /* No clobbers */);
7212 return result;
7215 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7216 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7218 int64x2_t result;
7219 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
7220 : "=w"(result)
7221 : "0"(a), "w"(b), "w"(c)
7222 : /* No clobbers */);
7223 return result;
7226 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7227 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7229 uint16x8_t result;
7230 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
7231 : "=w"(result)
7232 : "0"(a), "w"(b), "w"(c)
7233 : /* No clobbers */);
7234 return result;
7237 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7238 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7240 uint32x4_t result;
7241 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
7242 : "=w"(result)
7243 : "0"(a), "w"(b), "w"(c)
7244 : /* No clobbers */);
7245 return result;
7248 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7249 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7251 uint64x2_t result;
7252 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7253 : "=w"(result)
7254 : "0"(a), "w"(b), "w"(c)
7255 : /* No clobbers */);
7256 return result;
7259 #define vmlal_lane_s16(a, b, c, d) \
7260 __extension__ \
7261 ({ \
7262 int16x4_t c_ = (c); \
7263 int16x4_t b_ = (b); \
7264 int32x4_t a_ = (a); \
7265 int32x4_t result; \
7266 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7267 : "=w"(result) \
7268 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7269 : /* No clobbers */); \
7270 result; \
7273 #define vmlal_lane_s32(a, b, c, d) \
7274 __extension__ \
7275 ({ \
7276 int32x2_t c_ = (c); \
7277 int32x2_t b_ = (b); \
7278 int64x2_t a_ = (a); \
7279 int64x2_t result; \
7280 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7281 : "=w"(result) \
7282 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7283 : /* No clobbers */); \
7284 result; \
7287 #define vmlal_lane_u16(a, b, c, d) \
7288 __extension__ \
7289 ({ \
7290 uint16x4_t c_ = (c); \
7291 uint16x4_t b_ = (b); \
7292 uint32x4_t a_ = (a); \
7293 uint32x4_t result; \
7294 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7295 : "=w"(result) \
7296 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7297 : /* No clobbers */); \
7298 result; \
7301 #define vmlal_lane_u32(a, b, c, d) \
7302 __extension__ \
7303 ({ \
7304 uint32x2_t c_ = (c); \
7305 uint32x2_t b_ = (b); \
7306 uint64x2_t a_ = (a); \
7307 uint64x2_t result; \
7308 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7309 : "=w"(result) \
7310 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7311 : /* No clobbers */); \
7312 result; \
7315 #define vmlal_laneq_s16(a, b, c, d) \
7316 __extension__ \
7317 ({ \
7318 int16x8_t c_ = (c); \
7319 int16x4_t b_ = (b); \
7320 int32x4_t a_ = (a); \
7321 int32x4_t result; \
7322 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7323 : "=w"(result) \
7324 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7325 : /* No clobbers */); \
7326 result; \
7329 #define vmlal_laneq_s32(a, b, c, d) \
7330 __extension__ \
7331 ({ \
7332 int32x4_t c_ = (c); \
7333 int32x2_t b_ = (b); \
7334 int64x2_t a_ = (a); \
7335 int64x2_t result; \
7336 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7337 : "=w"(result) \
7338 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7339 : /* No clobbers */); \
7340 result; \
7343 #define vmlal_laneq_u16(a, b, c, d) \
7344 __extension__ \
7345 ({ \
7346 uint16x8_t c_ = (c); \
7347 uint16x4_t b_ = (b); \
7348 uint32x4_t a_ = (a); \
7349 uint32x4_t result; \
7350 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7351 : "=w"(result) \
7352 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7353 : /* No clobbers */); \
7354 result; \
7357 #define vmlal_laneq_u32(a, b, c, d) \
7358 __extension__ \
7359 ({ \
7360 uint32x4_t c_ = (c); \
7361 uint32x2_t b_ = (b); \
7362 uint64x2_t a_ = (a); \
7363 uint64x2_t result; \
7364 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7365 : "=w"(result) \
7366 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7367 : /* No clobbers */); \
7368 result; \
7371 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7372 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7374 int32x4_t result;
7375 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7376 : "=w"(result)
7377 : "0"(a), "w"(b), "x"(c)
7378 : /* No clobbers */);
7379 return result;
7382 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7383 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7385 int64x2_t result;
7386 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7387 : "=w"(result)
7388 : "0"(a), "w"(b), "w"(c)
7389 : /* No clobbers */);
7390 return result;
7393 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7394 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7396 uint32x4_t result;
7397 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7398 : "=w"(result)
7399 : "0"(a), "w"(b), "x"(c)
7400 : /* No clobbers */);
7401 return result;
7404 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7405 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7407 uint64x2_t result;
7408 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7409 : "=w"(result)
7410 : "0"(a), "w"(b), "w"(c)
7411 : /* No clobbers */);
7412 return result;
7415 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7416 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7418 int16x8_t result;
7419 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7420 : "=w"(result)
7421 : "0"(a), "w"(b), "w"(c)
7422 : /* No clobbers */);
7423 return result;
7426 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7427 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7429 int32x4_t result;
7430 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7431 : "=w"(result)
7432 : "0"(a), "w"(b), "w"(c)
7433 : /* No clobbers */);
7434 return result;
7437 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7438 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7440 int64x2_t result;
7441 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7442 : "=w"(result)
7443 : "0"(a), "w"(b), "w"(c)
7444 : /* No clobbers */);
7445 return result;
7448 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7449 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7451 uint16x8_t result;
7452 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7453 : "=w"(result)
7454 : "0"(a), "w"(b), "w"(c)
7455 : /* No clobbers */);
7456 return result;
7459 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7460 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7462 uint32x4_t result;
7463 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7464 : "=w"(result)
7465 : "0"(a), "w"(b), "w"(c)
7466 : /* No clobbers */);
7467 return result;
7470 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7471 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7473 uint64x2_t result;
7474 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7475 : "=w"(result)
7476 : "0"(a), "w"(b), "w"(c)
7477 : /* No clobbers */);
7478 return result;
7481 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7482 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7484 float32x4_t result;
7485 float32x4_t t1;
7486 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7487 : "=w"(result), "=w"(t1)
7488 : "0"(a), "w"(b), "w"(c)
7489 : /* No clobbers */);
7490 return result;
7493 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7494 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7496 float64x2_t result;
7497 float64x2_t t1;
7498 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7499 : "=w"(result), "=w"(t1)
7500 : "0"(a), "w"(b), "w"(c)
7501 : /* No clobbers */);
7502 return result;
7505 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7506 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7508 int16x8_t result;
7509 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7510 : "=w"(result)
7511 : "0"(a), "w"(b), "x"(c)
7512 : /* No clobbers */);
7513 return result;
7516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7517 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7519 int32x4_t result;
7520 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7521 : "=w"(result)
7522 : "0"(a), "w"(b), "w"(c)
7523 : /* No clobbers */);
7524 return result;
7527 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7528 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7530 uint16x8_t result;
7531 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7532 : "=w"(result)
7533 : "0"(a), "w"(b), "x"(c)
7534 : /* No clobbers */);
7535 return result;
7538 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7539 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7541 uint32x4_t result;
7542 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7543 : "=w"(result)
7544 : "0"(a), "w"(b), "w"(c)
7545 : /* No clobbers */);
7546 return result;
7549 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7550 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7552 int8x16_t result;
7553 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7554 : "=w"(result)
7555 : "0"(a), "w"(b), "w"(c)
7556 : /* No clobbers */);
7557 return result;
7560 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7561 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7563 int16x8_t result;
7564 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7565 : "=w"(result)
7566 : "0"(a), "w"(b), "w"(c)
7567 : /* No clobbers */);
7568 return result;
7571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7572 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7574 int32x4_t result;
7575 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7576 : "=w"(result)
7577 : "0"(a), "w"(b), "w"(c)
7578 : /* No clobbers */);
7579 return result;
7582 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7583 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7585 uint8x16_t result;
7586 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7587 : "=w"(result)
7588 : "0"(a), "w"(b), "w"(c)
7589 : /* No clobbers */);
7590 return result;
7593 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7594 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7596 uint16x8_t result;
7597 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7598 : "=w"(result)
7599 : "0"(a), "w"(b), "w"(c)
7600 : /* No clobbers */);
7601 return result;
7604 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7605 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7607 uint32x4_t result;
7608 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7609 : "=w"(result)
7610 : "0"(a), "w"(b), "w"(c)
7611 : /* No clobbers */);
7612 return result;
7615 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7616 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7618 float32x2_t result;
7619 float32x2_t t1;
7620 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7621 : "=w"(result), "=w"(t1)
7622 : "0"(a), "w"(b), "w"(c)
7623 : /* No clobbers */);
7624 return result;
7627 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7628 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7630 int16x4_t result;
7631 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7632 : "=w"(result)
7633 : "0"(a), "w"(b), "x"(c)
7634 : /* No clobbers */);
7635 return result;
7638 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7639 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7641 int32x2_t result;
7642 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7643 : "=w"(result)
7644 : "0"(a), "w"(b), "w"(c)
7645 : /* No clobbers */);
7646 return result;
7649 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7650 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7652 uint16x4_t result;
7653 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7654 : "=w"(result)
7655 : "0"(a), "w"(b), "x"(c)
7656 : /* No clobbers */);
7657 return result;
7660 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7661 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7663 uint32x2_t result;
7664 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7665 : "=w"(result)
7666 : "0"(a), "w"(b), "w"(c)
7667 : /* No clobbers */);
7668 return result;
7671 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7672 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7674 int8x8_t result;
7675 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7676 : "=w"(result)
7677 : "0"(a), "w"(b), "w"(c)
7678 : /* No clobbers */);
7679 return result;
7682 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7683 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7685 int16x4_t result;
7686 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7687 : "=w"(result)
7688 : "0"(a), "w"(b), "w"(c)
7689 : /* No clobbers */);
7690 return result;
7693 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7694 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7696 int32x2_t result;
7697 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7698 : "=w"(result)
7699 : "0"(a), "w"(b), "w"(c)
7700 : /* No clobbers */);
7701 return result;
7704 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7705 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7707 uint8x8_t result;
7708 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7709 : "=w"(result)
7710 : "0"(a), "w"(b), "w"(c)
7711 : /* No clobbers */);
7712 return result;
7715 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7716 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7718 uint16x4_t result;
7719 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7720 : "=w"(result)
7721 : "0"(a), "w"(b), "w"(c)
7722 : /* No clobbers */);
7723 return result;
7726 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7727 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7729 uint32x2_t result;
7730 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7731 : "=w"(result)
7732 : "0"(a), "w"(b), "w"(c)
7733 : /* No clobbers */);
7734 return result;
7737 #define vmlsl_high_lane_s16(a, b, c, d) \
7738 __extension__ \
7739 ({ \
7740 int16x8_t c_ = (c); \
7741 int16x8_t b_ = (b); \
7742 int32x4_t a_ = (a); \
7743 int32x4_t result; \
7744 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7745 : "=w"(result) \
7746 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7747 : /* No clobbers */); \
7748 result; \
7751 #define vmlsl_high_lane_s32(a, b, c, d) \
7752 __extension__ \
7753 ({ \
7754 int32x4_t c_ = (c); \
7755 int32x4_t b_ = (b); \
7756 int64x2_t a_ = (a); \
7757 int64x2_t result; \
7758 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7759 : "=w"(result) \
7760 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7761 : /* No clobbers */); \
7762 result; \
7765 #define vmlsl_high_lane_u16(a, b, c, d) \
7766 __extension__ \
7767 ({ \
7768 uint16x8_t c_ = (c); \
7769 uint16x8_t b_ = (b); \
7770 uint32x4_t a_ = (a); \
7771 uint32x4_t result; \
7772 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7773 : "=w"(result) \
7774 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7775 : /* No clobbers */); \
7776 result; \
7779 #define vmlsl_high_lane_u32(a, b, c, d) \
7780 __extension__ \
7781 ({ \
7782 uint32x4_t c_ = (c); \
7783 uint32x4_t b_ = (b); \
7784 uint64x2_t a_ = (a); \
7785 uint64x2_t result; \
7786 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7787 : "=w"(result) \
7788 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7789 : /* No clobbers */); \
7790 result; \
7793 #define vmlsl_high_laneq_s16(a, b, c, d) \
7794 __extension__ \
7795 ({ \
7796 int16x8_t c_ = (c); \
7797 int16x8_t b_ = (b); \
7798 int32x4_t a_ = (a); \
7799 int32x4_t result; \
7800 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7801 : "=w"(result) \
7802 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7803 : /* No clobbers */); \
7804 result; \
7807 #define vmlsl_high_laneq_s32(a, b, c, d) \
7808 __extension__ \
7809 ({ \
7810 int32x4_t c_ = (c); \
7811 int32x4_t b_ = (b); \
7812 int64x2_t a_ = (a); \
7813 int64x2_t result; \
7814 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7815 : "=w"(result) \
7816 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7817 : /* No clobbers */); \
7818 result; \
7821 #define vmlsl_high_laneq_u16(a, b, c, d) \
7822 __extension__ \
7823 ({ \
7824 uint16x8_t c_ = (c); \
7825 uint16x8_t b_ = (b); \
7826 uint32x4_t a_ = (a); \
7827 uint32x4_t result; \
7828 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7829 : "=w"(result) \
7830 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7831 : /* No clobbers */); \
7832 result; \
7835 #define vmlsl_high_laneq_u32(a, b, c, d) \
7836 __extension__ \
7837 ({ \
7838 uint32x4_t c_ = (c); \
7839 uint32x4_t b_ = (b); \
7840 uint64x2_t a_ = (a); \
7841 uint64x2_t result; \
7842 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7843 : "=w"(result) \
7844 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7845 : /* No clobbers */); \
7846 result; \
7849 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7850 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7852 int32x4_t result;
7853 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7854 : "=w"(result)
7855 : "0"(a), "w"(b), "x"(c)
7856 : /* No clobbers */);
7857 return result;
7860 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7861 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7863 int64x2_t result;
7864 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7865 : "=w"(result)
7866 : "0"(a), "w"(b), "w"(c)
7867 : /* No clobbers */);
7868 return result;
7871 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7872 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7874 uint32x4_t result;
7875 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7876 : "=w"(result)
7877 : "0"(a), "w"(b), "x"(c)
7878 : /* No clobbers */);
7879 return result;
7882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7883 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7885 uint64x2_t result;
7886 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7887 : "=w"(result)
7888 : "0"(a), "w"(b), "w"(c)
7889 : /* No clobbers */);
7890 return result;
7893 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7894 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7896 int16x8_t result;
7897 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7898 : "=w"(result)
7899 : "0"(a), "w"(b), "w"(c)
7900 : /* No clobbers */);
7901 return result;
7904 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7905 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7907 int32x4_t result;
7908 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7909 : "=w"(result)
7910 : "0"(a), "w"(b), "w"(c)
7911 : /* No clobbers */);
7912 return result;
7915 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7916 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7918 int64x2_t result;
7919 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7920 : "=w"(result)
7921 : "0"(a), "w"(b), "w"(c)
7922 : /* No clobbers */);
7923 return result;
7926 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7927 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7929 uint16x8_t result;
7930 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7931 : "=w"(result)
7932 : "0"(a), "w"(b), "w"(c)
7933 : /* No clobbers */);
7934 return result;
7937 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7938 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7940 uint32x4_t result;
7941 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7942 : "=w"(result)
7943 : "0"(a), "w"(b), "w"(c)
7944 : /* No clobbers */);
7945 return result;
7948 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7949 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7951 uint64x2_t result;
7952 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7953 : "=w"(result)
7954 : "0"(a), "w"(b), "w"(c)
7955 : /* No clobbers */);
7956 return result;
7959 #define vmlsl_lane_s16(a, b, c, d) \
7960 __extension__ \
7961 ({ \
7962 int16x4_t c_ = (c); \
7963 int16x4_t b_ = (b); \
7964 int32x4_t a_ = (a); \
7965 int32x4_t result; \
7966 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7967 : "=w"(result) \
7968 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7969 : /* No clobbers */); \
7970 result; \
7973 #define vmlsl_lane_s32(a, b, c, d) \
7974 __extension__ \
7975 ({ \
7976 int32x2_t c_ = (c); \
7977 int32x2_t b_ = (b); \
7978 int64x2_t a_ = (a); \
7979 int64x2_t result; \
7980 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7981 : "=w"(result) \
7982 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7983 : /* No clobbers */); \
7984 result; \
7987 #define vmlsl_lane_u16(a, b, c, d) \
7988 __extension__ \
7989 ({ \
7990 uint16x4_t c_ = (c); \
7991 uint16x4_t b_ = (b); \
7992 uint32x4_t a_ = (a); \
7993 uint32x4_t result; \
7994 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7995 : "=w"(result) \
7996 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7997 : /* No clobbers */); \
7998 result; \
8001 #define vmlsl_lane_u32(a, b, c, d) \
8002 __extension__ \
8003 ({ \
8004 uint32x2_t c_ = (c); \
8005 uint32x2_t b_ = (b); \
8006 uint64x2_t a_ = (a); \
8007 uint64x2_t result; \
8008 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8009 : "=w"(result) \
8010 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8011 : /* No clobbers */); \
8012 result; \
8015 #define vmlsl_laneq_s16(a, b, c, d) \
8016 __extension__ \
8017 ({ \
8018 int16x8_t c_ = (c); \
8019 int16x4_t b_ = (b); \
8020 int32x4_t a_ = (a); \
8021 int32x4_t result; \
8022 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
8023 : "=w"(result) \
8024 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8025 : /* No clobbers */); \
8026 result; \
8029 #define vmlsl_laneq_s32(a, b, c, d) \
8030 __extension__ \
8031 ({ \
8032 int32x4_t c_ = (c); \
8033 int32x2_t b_ = (b); \
8034 int64x2_t a_ = (a); \
8035 int64x2_t result; \
8036 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
8037 : "=w"(result) \
8038 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8039 : /* No clobbers */); \
8040 result; \
8043 #define vmlsl_laneq_u16(a, b, c, d) \
8044 __extension__ \
8045 ({ \
8046 uint16x8_t c_ = (c); \
8047 uint16x4_t b_ = (b); \
8048 uint32x4_t a_ = (a); \
8049 uint32x4_t result; \
8050 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
8051 : "=w"(result) \
8052 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
8053 : /* No clobbers */); \
8054 result; \
8057 #define vmlsl_laneq_u32(a, b, c, d) \
8058 __extension__ \
8059 ({ \
8060 uint32x4_t c_ = (c); \
8061 uint32x2_t b_ = (b); \
8062 uint64x2_t a_ = (a); \
8063 uint64x2_t result; \
8064 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8065 : "=w"(result) \
8066 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8067 : /* No clobbers */); \
8068 result; \
8071 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8072 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
8074 int32x4_t result;
8075 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
8076 : "=w"(result)
8077 : "0"(a), "w"(b), "x"(c)
8078 : /* No clobbers */);
8079 return result;
8082 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8083 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
8085 int64x2_t result;
8086 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
8087 : "=w"(result)
8088 : "0"(a), "w"(b), "w"(c)
8089 : /* No clobbers */);
8090 return result;
8093 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8094 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
8096 uint32x4_t result;
8097 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
8098 : "=w"(result)
8099 : "0"(a), "w"(b), "x"(c)
8100 : /* No clobbers */);
8101 return result;
8104 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8105 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
8107 uint64x2_t result;
8108 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
8109 : "=w"(result)
8110 : "0"(a), "w"(b), "w"(c)
8111 : /* No clobbers */);
8112 return result;
8115 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8116 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
8118 int16x8_t result;
8119 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
8120 : "=w"(result)
8121 : "0"(a), "w"(b), "w"(c)
8122 : /* No clobbers */);
8123 return result;
8126 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8127 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
8129 int32x4_t result;
8130 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
8131 : "=w"(result)
8132 : "0"(a), "w"(b), "w"(c)
8133 : /* No clobbers */);
8134 return result;
8137 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8138 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
8140 int64x2_t result;
8141 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
8142 : "=w"(result)
8143 : "0"(a), "w"(b), "w"(c)
8144 : /* No clobbers */);
8145 return result;
8148 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8149 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
8151 uint16x8_t result;
8152 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
8153 : "=w"(result)
8154 : "0"(a), "w"(b), "w"(c)
8155 : /* No clobbers */);
8156 return result;
8159 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8160 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
8162 uint32x4_t result;
8163 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
8164 : "=w"(result)
8165 : "0"(a), "w"(b), "w"(c)
8166 : /* No clobbers */);
8167 return result;
8170 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8171 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
8173 uint64x2_t result;
8174 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
8175 : "=w"(result)
8176 : "0"(a), "w"(b), "w"(c)
8177 : /* No clobbers */);
8178 return result;
8181 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8182 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
8184 float32x4_t result;
8185 float32x4_t t1;
8186 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
8187 : "=w"(result), "=w"(t1)
8188 : "0"(a), "w"(b), "w"(c)
8189 : /* No clobbers */);
8190 return result;
8193 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8194 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
8196 float64x2_t result;
8197 float64x2_t t1;
8198 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
8199 : "=w"(result), "=w"(t1)
8200 : "0"(a), "w"(b), "x"(c)
8201 : /* No clobbers */);
8202 return result;
8205 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8206 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
8208 int16x8_t result;
8209 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8210 : "=w"(result)
8211 : "0"(a), "w"(b), "x"(c)
8212 : /* No clobbers */);
8213 return result;
8216 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8217 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8219 int32x4_t result;
8220 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8221 : "=w"(result)
8222 : "0"(a), "w"(b), "w"(c)
8223 : /* No clobbers */);
8224 return result;
8227 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8228 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8230 uint16x8_t result;
8231 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
8232 : "=w"(result)
8233 : "0"(a), "w"(b), "x"(c)
8234 : /* No clobbers */);
8235 return result;
8238 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8239 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8241 uint32x4_t result;
8242 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
8243 : "=w"(result)
8244 : "0"(a), "w"(b), "w"(c)
8245 : /* No clobbers */);
8246 return result;
8249 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8250 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8252 int8x16_t result;
8253 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8254 : "=w"(result)
8255 : "0"(a), "w"(b), "w"(c)
8256 : /* No clobbers */);
8257 return result;
8260 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8261 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8263 int16x8_t result;
8264 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8265 : "=w"(result)
8266 : "0"(a), "w"(b), "w"(c)
8267 : /* No clobbers */);
8268 return result;
8271 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8272 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8274 int32x4_t result;
8275 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8276 : "=w"(result)
8277 : "0"(a), "w"(b), "w"(c)
8278 : /* No clobbers */);
8279 return result;
8282 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8283 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8285 uint8x16_t result;
8286 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8287 : "=w"(result)
8288 : "0"(a), "w"(b), "w"(c)
8289 : /* No clobbers */);
8290 return result;
8293 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8294 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8296 uint16x8_t result;
8297 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8298 : "=w"(result)
8299 : "0"(a), "w"(b), "w"(c)
8300 : /* No clobbers */);
8301 return result;
8304 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8305 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8307 uint32x4_t result;
8308 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8309 : "=w"(result)
8310 : "0"(a), "w"(b), "w"(c)
8311 : /* No clobbers */);
8312 return result;
8315 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8316 vmovl_high_s8 (int8x16_t a)
8318 int16x8_t result;
8319 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8320 : "=w"(result)
8321 : "w"(a)
8322 : /* No clobbers */);
8323 return result;
8326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8327 vmovl_high_s16 (int16x8_t a)
8329 int32x4_t result;
8330 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8331 : "=w"(result)
8332 : "w"(a)
8333 : /* No clobbers */);
8334 return result;
8337 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8338 vmovl_high_s32 (int32x4_t a)
8340 int64x2_t result;
8341 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8342 : "=w"(result)
8343 : "w"(a)
8344 : /* No clobbers */);
8345 return result;
8348 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8349 vmovl_high_u8 (uint8x16_t a)
8351 uint16x8_t result;
8352 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8353 : "=w"(result)
8354 : "w"(a)
8355 : /* No clobbers */);
8356 return result;
8359 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8360 vmovl_high_u16 (uint16x8_t a)
8362 uint32x4_t result;
8363 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8364 : "=w"(result)
8365 : "w"(a)
8366 : /* No clobbers */);
8367 return result;
8370 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8371 vmovl_high_u32 (uint32x4_t a)
8373 uint64x2_t result;
8374 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8375 : "=w"(result)
8376 : "w"(a)
8377 : /* No clobbers */);
8378 return result;
8381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8382 vmovl_s8 (int8x8_t a)
8384 int16x8_t result;
8385 __asm__ ("sshll %0.8h,%1.8b,#0"
8386 : "=w"(result)
8387 : "w"(a)
8388 : /* No clobbers */);
8389 return result;
8392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8393 vmovl_s16 (int16x4_t a)
8395 int32x4_t result;
8396 __asm__ ("sshll %0.4s,%1.4h,#0"
8397 : "=w"(result)
8398 : "w"(a)
8399 : /* No clobbers */);
8400 return result;
8403 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8404 vmovl_s32 (int32x2_t a)
8406 int64x2_t result;
8407 __asm__ ("sshll %0.2d,%1.2s,#0"
8408 : "=w"(result)
8409 : "w"(a)
8410 : /* No clobbers */);
8411 return result;
8414 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8415 vmovl_u8 (uint8x8_t a)
8417 uint16x8_t result;
8418 __asm__ ("ushll %0.8h,%1.8b,#0"
8419 : "=w"(result)
8420 : "w"(a)
8421 : /* No clobbers */);
8422 return result;
8425 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8426 vmovl_u16 (uint16x4_t a)
8428 uint32x4_t result;
8429 __asm__ ("ushll %0.4s,%1.4h,#0"
8430 : "=w"(result)
8431 : "w"(a)
8432 : /* No clobbers */);
8433 return result;
8436 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8437 vmovl_u32 (uint32x2_t a)
8439 uint64x2_t result;
8440 __asm__ ("ushll %0.2d,%1.2s,#0"
8441 : "=w"(result)
8442 : "w"(a)
8443 : /* No clobbers */);
8444 return result;
8447 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8448 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8450 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8451 __asm__ ("xtn2 %0.16b,%1.8h"
8452 : "+w"(result)
8453 : "w"(b)
8454 : /* No clobbers */);
8455 return result;
8458 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8459 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8461 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8462 __asm__ ("xtn2 %0.8h,%1.4s"
8463 : "+w"(result)
8464 : "w"(b)
8465 : /* No clobbers */);
8466 return result;
8469 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8470 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8472 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8473 __asm__ ("xtn2 %0.4s,%1.2d"
8474 : "+w"(result)
8475 : "w"(b)
8476 : /* No clobbers */);
8477 return result;
8480 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8481 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8483 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8484 __asm__ ("xtn2 %0.16b,%1.8h"
8485 : "+w"(result)
8486 : "w"(b)
8487 : /* No clobbers */);
8488 return result;
8491 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8492 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8494 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8495 __asm__ ("xtn2 %0.8h,%1.4s"
8496 : "+w"(result)
8497 : "w"(b)
8498 : /* No clobbers */);
8499 return result;
8502 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8503 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8505 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8506 __asm__ ("xtn2 %0.4s,%1.2d"
8507 : "+w"(result)
8508 : "w"(b)
8509 : /* No clobbers */);
8510 return result;
8513 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8514 vmovn_s16 (int16x8_t a)
8516 int8x8_t result;
8517 __asm__ ("xtn %0.8b,%1.8h"
8518 : "=w"(result)
8519 : "w"(a)
8520 : /* No clobbers */);
8521 return result;
8524 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8525 vmovn_s32 (int32x4_t a)
8527 int16x4_t result;
8528 __asm__ ("xtn %0.4h,%1.4s"
8529 : "=w"(result)
8530 : "w"(a)
8531 : /* No clobbers */);
8532 return result;
8535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8536 vmovn_s64 (int64x2_t a)
8538 int32x2_t result;
8539 __asm__ ("xtn %0.2s,%1.2d"
8540 : "=w"(result)
8541 : "w"(a)
8542 : /* No clobbers */);
8543 return result;
8546 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8547 vmovn_u16 (uint16x8_t a)
8549 uint8x8_t result;
8550 __asm__ ("xtn %0.8b,%1.8h"
8551 : "=w"(result)
8552 : "w"(a)
8553 : /* No clobbers */);
8554 return result;
8557 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8558 vmovn_u32 (uint32x4_t a)
8560 uint16x4_t result;
8561 __asm__ ("xtn %0.4h,%1.4s"
8562 : "=w"(result)
8563 : "w"(a)
8564 : /* No clobbers */);
8565 return result;
8568 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8569 vmovn_u64 (uint64x2_t a)
8571 uint32x2_t result;
8572 __asm__ ("xtn %0.2s,%1.2d"
8573 : "=w"(result)
8574 : "w"(a)
8575 : /* No clobbers */);
8576 return result;
8579 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8580 vmul_n_f32 (float32x2_t a, float32_t b)
8582 float32x2_t result;
8583 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8584 : "=w"(result)
8585 : "w"(a), "w"(b)
8586 : /* No clobbers */);
8587 return result;
8590 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8591 vmul_n_s16 (int16x4_t a, int16_t b)
8593 int16x4_t result;
8594 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8595 : "=w"(result)
8596 : "w"(a), "x"(b)
8597 : /* No clobbers */);
8598 return result;
8601 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8602 vmul_n_s32 (int32x2_t a, int32_t b)
8604 int32x2_t result;
8605 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8606 : "=w"(result)
8607 : "w"(a), "w"(b)
8608 : /* No clobbers */);
8609 return result;
8612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8613 vmul_n_u16 (uint16x4_t a, uint16_t b)
8615 uint16x4_t result;
8616 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8617 : "=w"(result)
8618 : "w"(a), "x"(b)
8619 : /* No clobbers */);
8620 return result;
8623 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8624 vmul_n_u32 (uint32x2_t a, uint32_t b)
8626 uint32x2_t result;
8627 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8628 : "=w"(result)
8629 : "w"(a), "w"(b)
8630 : /* No clobbers */);
8631 return result;
8634 #define vmuld_lane_f64(a, b, c) \
8635 __extension__ \
8636 ({ \
8637 float64x2_t b_ = (b); \
8638 float64_t a_ = (a); \
8639 float64_t result; \
8640 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8641 : "=w"(result) \
8642 : "w"(a_), "w"(b_), "i"(c) \
8643 : /* No clobbers */); \
8644 result; \
8647 #define vmull_high_lane_s16(a, b, c) \
8648 __extension__ \
8649 ({ \
8650 int16x8_t b_ = (b); \
8651 int16x8_t a_ = (a); \
8652 int32x4_t result; \
8653 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8654 : "=w"(result) \
8655 : "w"(a_), "x"(b_), "i"(c) \
8656 : /* No clobbers */); \
8657 result; \
8660 #define vmull_high_lane_s32(a, b, c) \
8661 __extension__ \
8662 ({ \
8663 int32x4_t b_ = (b); \
8664 int32x4_t a_ = (a); \
8665 int64x2_t result; \
8666 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8667 : "=w"(result) \
8668 : "w"(a_), "w"(b_), "i"(c) \
8669 : /* No clobbers */); \
8670 result; \
8673 #define vmull_high_lane_u16(a, b, c) \
8674 __extension__ \
8675 ({ \
8676 uint16x8_t b_ = (b); \
8677 uint16x8_t a_ = (a); \
8678 uint32x4_t result; \
8679 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8680 : "=w"(result) \
8681 : "w"(a_), "x"(b_), "i"(c) \
8682 : /* No clobbers */); \
8683 result; \
8686 #define vmull_high_lane_u32(a, b, c) \
8687 __extension__ \
8688 ({ \
8689 uint32x4_t b_ = (b); \
8690 uint32x4_t a_ = (a); \
8691 uint64x2_t result; \
8692 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8693 : "=w"(result) \
8694 : "w"(a_), "w"(b_), "i"(c) \
8695 : /* No clobbers */); \
8696 result; \
8699 #define vmull_high_laneq_s16(a, b, c) \
8700 __extension__ \
8701 ({ \
8702 int16x8_t b_ = (b); \
8703 int16x8_t a_ = (a); \
8704 int32x4_t result; \
8705 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8706 : "=w"(result) \
8707 : "w"(a_), "x"(b_), "i"(c) \
8708 : /* No clobbers */); \
8709 result; \
8712 #define vmull_high_laneq_s32(a, b, c) \
8713 __extension__ \
8714 ({ \
8715 int32x4_t b_ = (b); \
8716 int32x4_t a_ = (a); \
8717 int64x2_t result; \
8718 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8719 : "=w"(result) \
8720 : "w"(a_), "w"(b_), "i"(c) \
8721 : /* No clobbers */); \
8722 result; \
8725 #define vmull_high_laneq_u16(a, b, c) \
8726 __extension__ \
8727 ({ \
8728 uint16x8_t b_ = (b); \
8729 uint16x8_t a_ = (a); \
8730 uint32x4_t result; \
8731 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8732 : "=w"(result) \
8733 : "w"(a_), "x"(b_), "i"(c) \
8734 : /* No clobbers */); \
8735 result; \
8738 #define vmull_high_laneq_u32(a, b, c) \
8739 __extension__ \
8740 ({ \
8741 uint32x4_t b_ = (b); \
8742 uint32x4_t a_ = (a); \
8743 uint64x2_t result; \
8744 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8745 : "=w"(result) \
8746 : "w"(a_), "w"(b_), "i"(c) \
8747 : /* No clobbers */); \
8748 result; \
8751 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8752 vmull_high_n_s16 (int16x8_t a, int16_t b)
8754 int32x4_t result;
8755 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8756 : "=w"(result)
8757 : "w"(a), "x"(b)
8758 : /* No clobbers */);
8759 return result;
8762 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8763 vmull_high_n_s32 (int32x4_t a, int32_t b)
8765 int64x2_t result;
8766 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8767 : "=w"(result)
8768 : "w"(a), "w"(b)
8769 : /* No clobbers */);
8770 return result;
8773 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8774 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8776 uint32x4_t result;
8777 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8778 : "=w"(result)
8779 : "w"(a), "x"(b)
8780 : /* No clobbers */);
8781 return result;
8784 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8785 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8787 uint64x2_t result;
8788 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8789 : "=w"(result)
8790 : "w"(a), "w"(b)
8791 : /* No clobbers */);
8792 return result;
8795 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8796 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8798 poly16x8_t result;
8799 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8800 : "=w"(result)
8801 : "w"(a), "w"(b)
8802 : /* No clobbers */);
8803 return result;
8806 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8807 vmull_high_s8 (int8x16_t a, int8x16_t b)
8809 int16x8_t result;
8810 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8811 : "=w"(result)
8812 : "w"(a), "w"(b)
8813 : /* No clobbers */);
8814 return result;
8817 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8818 vmull_high_s16 (int16x8_t a, int16x8_t b)
8820 int32x4_t result;
8821 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8822 : "=w"(result)
8823 : "w"(a), "w"(b)
8824 : /* No clobbers */);
8825 return result;
8828 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8829 vmull_high_s32 (int32x4_t a, int32x4_t b)
8831 int64x2_t result;
8832 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8833 : "=w"(result)
8834 : "w"(a), "w"(b)
8835 : /* No clobbers */);
8836 return result;
8839 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8840 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8842 uint16x8_t result;
8843 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8844 : "=w"(result)
8845 : "w"(a), "w"(b)
8846 : /* No clobbers */);
8847 return result;
8850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8851 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8853 uint32x4_t result;
8854 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8855 : "=w"(result)
8856 : "w"(a), "w"(b)
8857 : /* No clobbers */);
8858 return result;
8861 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8862 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8864 uint64x2_t result;
8865 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8866 : "=w"(result)
8867 : "w"(a), "w"(b)
8868 : /* No clobbers */);
8869 return result;
8872 #define vmull_lane_s16(a, b, c) \
8873 __extension__ \
8874 ({ \
8875 int16x4_t b_ = (b); \
8876 int16x4_t a_ = (a); \
8877 int32x4_t result; \
8878 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8879 : "=w"(result) \
8880 : "w"(a_), "x"(b_), "i"(c) \
8881 : /* No clobbers */); \
8882 result; \
8885 #define vmull_lane_s32(a, b, c) \
8886 __extension__ \
8887 ({ \
8888 int32x2_t b_ = (b); \
8889 int32x2_t a_ = (a); \
8890 int64x2_t result; \
8891 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8892 : "=w"(result) \
8893 : "w"(a_), "w"(b_), "i"(c) \
8894 : /* No clobbers */); \
8895 result; \
8898 #define vmull_lane_u16(a, b, c) \
8899 __extension__ \
8900 ({ \
8901 uint16x4_t b_ = (b); \
8902 uint16x4_t a_ = (a); \
8903 uint32x4_t result; \
8904 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8905 : "=w"(result) \
8906 : "w"(a_), "x"(b_), "i"(c) \
8907 : /* No clobbers */); \
8908 result; \
8911 #define vmull_lane_u32(a, b, c) \
8912 __extension__ \
8913 ({ \
8914 uint32x2_t b_ = (b); \
8915 uint32x2_t a_ = (a); \
8916 uint64x2_t result; \
8917 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8918 : "=w"(result) \
8919 : "w"(a_), "w"(b_), "i"(c) \
8920 : /* No clobbers */); \
8921 result; \
8924 #define vmull_laneq_s16(a, b, c) \
8925 __extension__ \
8926 ({ \
8927 int16x8_t b_ = (b); \
8928 int16x4_t a_ = (a); \
8929 int32x4_t result; \
8930 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8931 : "=w"(result) \
8932 : "w"(a_), "x"(b_), "i"(c) \
8933 : /* No clobbers */); \
8934 result; \
8937 #define vmull_laneq_s32(a, b, c) \
8938 __extension__ \
8939 ({ \
8940 int32x4_t b_ = (b); \
8941 int32x2_t a_ = (a); \
8942 int64x2_t result; \
8943 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8944 : "=w"(result) \
8945 : "w"(a_), "w"(b_), "i"(c) \
8946 : /* No clobbers */); \
8947 result; \
8950 #define vmull_laneq_u16(a, b, c) \
8951 __extension__ \
8952 ({ \
8953 uint16x8_t b_ = (b); \
8954 uint16x4_t a_ = (a); \
8955 uint32x4_t result; \
8956 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8957 : "=w"(result) \
8958 : "w"(a_), "x"(b_), "i"(c) \
8959 : /* No clobbers */); \
8960 result; \
8963 #define vmull_laneq_u32(a, b, c) \
8964 __extension__ \
8965 ({ \
8966 uint32x4_t b_ = (b); \
8967 uint32x2_t a_ = (a); \
8968 uint64x2_t result; \
8969 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8970 : "=w"(result) \
8971 : "w"(a_), "w"(b_), "i"(c) \
8972 : /* No clobbers */); \
8973 result; \
8976 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8977 vmull_n_s16 (int16x4_t a, int16_t b)
8979 int32x4_t result;
8980 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8981 : "=w"(result)
8982 : "w"(a), "x"(b)
8983 : /* No clobbers */);
8984 return result;
8987 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8988 vmull_n_s32 (int32x2_t a, int32_t b)
8990 int64x2_t result;
8991 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8992 : "=w"(result)
8993 : "w"(a), "w"(b)
8994 : /* No clobbers */);
8995 return result;
8998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8999 vmull_n_u16 (uint16x4_t a, uint16_t b)
9001 uint32x4_t result;
9002 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
9003 : "=w"(result)
9004 : "w"(a), "x"(b)
9005 : /* No clobbers */);
9006 return result;
9009 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9010 vmull_n_u32 (uint32x2_t a, uint32_t b)
9012 uint64x2_t result;
9013 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
9014 : "=w"(result)
9015 : "w"(a), "w"(b)
9016 : /* No clobbers */);
9017 return result;
9020 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9021 vmull_p8 (poly8x8_t a, poly8x8_t b)
9023 poly16x8_t result;
9024 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
9025 : "=w"(result)
9026 : "w"(a), "w"(b)
9027 : /* No clobbers */);
9028 return result;
9031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9032 vmull_s8 (int8x8_t a, int8x8_t b)
9034 int16x8_t result;
9035 __asm__ ("smull %0.8h, %1.8b, %2.8b"
9036 : "=w"(result)
9037 : "w"(a), "w"(b)
9038 : /* No clobbers */);
9039 return result;
9042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9043 vmull_s16 (int16x4_t a, int16x4_t b)
9045 int32x4_t result;
9046 __asm__ ("smull %0.4s, %1.4h, %2.4h"
9047 : "=w"(result)
9048 : "w"(a), "w"(b)
9049 : /* No clobbers */);
9050 return result;
9053 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9054 vmull_s32 (int32x2_t a, int32x2_t b)
9056 int64x2_t result;
9057 __asm__ ("smull %0.2d, %1.2s, %2.2s"
9058 : "=w"(result)
9059 : "w"(a), "w"(b)
9060 : /* No clobbers */);
9061 return result;
9064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9065 vmull_u8 (uint8x8_t a, uint8x8_t b)
9067 uint16x8_t result;
9068 __asm__ ("umull %0.8h, %1.8b, %2.8b"
9069 : "=w"(result)
9070 : "w"(a), "w"(b)
9071 : /* No clobbers */);
9072 return result;
9075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9076 vmull_u16 (uint16x4_t a, uint16x4_t b)
9078 uint32x4_t result;
9079 __asm__ ("umull %0.4s, %1.4h, %2.4h"
9080 : "=w"(result)
9081 : "w"(a), "w"(b)
9082 : /* No clobbers */);
9083 return result;
9086 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9087 vmull_u32 (uint32x2_t a, uint32x2_t b)
9089 uint64x2_t result;
9090 __asm__ ("umull %0.2d, %1.2s, %2.2s"
9091 : "=w"(result)
9092 : "w"(a), "w"(b)
9093 : /* No clobbers */);
9094 return result;
9097 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9098 vmulq_n_f32 (float32x4_t a, float32_t b)
9100 float32x4_t result;
9101 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
9102 : "=w"(result)
9103 : "w"(a), "w"(b)
9104 : /* No clobbers */);
9105 return result;
9108 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9109 vmulq_n_f64 (float64x2_t a, float64_t b)
9111 float64x2_t result;
9112 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
9113 : "=w"(result)
9114 : "w"(a), "w"(b)
9115 : /* No clobbers */);
9116 return result;
9119 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9120 vmulq_n_s16 (int16x8_t a, int16_t b)
9122 int16x8_t result;
9123 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
9124 : "=w"(result)
9125 : "w"(a), "x"(b)
9126 : /* No clobbers */);
9127 return result;
9130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9131 vmulq_n_s32 (int32x4_t a, int32_t b)
9133 int32x4_t result;
9134 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
9135 : "=w"(result)
9136 : "w"(a), "w"(b)
9137 : /* No clobbers */);
9138 return result;
9141 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9142 vmulq_n_u16 (uint16x8_t a, uint16_t b)
9144 uint16x8_t result;
9145 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
9146 : "=w"(result)
9147 : "w"(a), "x"(b)
9148 : /* No clobbers */);
9149 return result;
9152 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9153 vmulq_n_u32 (uint32x4_t a, uint32_t b)
9155 uint32x4_t result;
9156 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
9157 : "=w"(result)
9158 : "w"(a), "w"(b)
9159 : /* No clobbers */);
9160 return result;
9163 #define vmuls_lane_f32(a, b, c) \
9164 __extension__ \
9165 ({ \
9166 float32x4_t b_ = (b); \
9167 float32_t a_ = (a); \
9168 float32_t result; \
9169 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
9170 : "=w"(result) \
9171 : "w"(a_), "w"(b_), "i"(c) \
9172 : /* No clobbers */); \
9173 result; \
9176 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9177 vmulx_f32 (float32x2_t a, float32x2_t b)
9179 float32x2_t result;
9180 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
9181 : "=w"(result)
9182 : "w"(a), "w"(b)
9183 : /* No clobbers */);
9184 return result;
9187 #define vmulx_lane_f32(a, b, c) \
9188 __extension__ \
9189 ({ \
9190 float32x4_t b_ = (b); \
9191 float32x2_t a_ = (a); \
9192 float32x2_t result; \
9193 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
9194 : "=w"(result) \
9195 : "w"(a_), "w"(b_), "i"(c) \
9196 : /* No clobbers */); \
9197 result; \
9200 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9201 vmulxd_f64 (float64_t a, float64_t b)
9203 float64_t result;
9204 __asm__ ("fmulx %d0, %d1, %d2"
9205 : "=w"(result)
9206 : "w"(a), "w"(b)
9207 : /* No clobbers */);
9208 return result;
9211 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9212 vmulxq_f32 (float32x4_t a, float32x4_t b)
9214 float32x4_t result;
9215 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
9216 : "=w"(result)
9217 : "w"(a), "w"(b)
9218 : /* No clobbers */);
9219 return result;
9222 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9223 vmulxq_f64 (float64x2_t a, float64x2_t b)
9225 float64x2_t result;
9226 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
9227 : "=w"(result)
9228 : "w"(a), "w"(b)
9229 : /* No clobbers */);
9230 return result;
9233 #define vmulxq_lane_f32(a, b, c) \
9234 __extension__ \
9235 ({ \
9236 float32x4_t b_ = (b); \
9237 float32x4_t a_ = (a); \
9238 float32x4_t result; \
9239 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
9240 : "=w"(result) \
9241 : "w"(a_), "w"(b_), "i"(c) \
9242 : /* No clobbers */); \
9243 result; \
9246 #define vmulxq_lane_f64(a, b, c) \
9247 __extension__ \
9248 ({ \
9249 float64x2_t b_ = (b); \
9250 float64x2_t a_ = (a); \
9251 float64x2_t result; \
9252 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9253 : "=w"(result) \
9254 : "w"(a_), "w"(b_), "i"(c) \
9255 : /* No clobbers */); \
9256 result; \
9259 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9260 vmulxs_f32 (float32_t a, float32_t b)
9262 float32_t result;
9263 __asm__ ("fmulx %s0, %s1, %s2"
9264 : "=w"(result)
9265 : "w"(a), "w"(b)
9266 : /* No clobbers */);
9267 return result;
9270 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9271 vmvn_p8 (poly8x8_t a)
9273 poly8x8_t result;
9274 __asm__ ("mvn %0.8b,%1.8b"
9275 : "=w"(result)
9276 : "w"(a)
9277 : /* No clobbers */);
9278 return result;
9281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9282 vmvn_s8 (int8x8_t a)
9284 int8x8_t result;
9285 __asm__ ("mvn %0.8b,%1.8b"
9286 : "=w"(result)
9287 : "w"(a)
9288 : /* No clobbers */);
9289 return result;
9292 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9293 vmvn_s16 (int16x4_t a)
9295 int16x4_t result;
9296 __asm__ ("mvn %0.8b,%1.8b"
9297 : "=w"(result)
9298 : "w"(a)
9299 : /* No clobbers */);
9300 return result;
9303 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9304 vmvn_s32 (int32x2_t a)
9306 int32x2_t result;
9307 __asm__ ("mvn %0.8b,%1.8b"
9308 : "=w"(result)
9309 : "w"(a)
9310 : /* No clobbers */);
9311 return result;
9314 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9315 vmvn_u8 (uint8x8_t a)
9317 uint8x8_t result;
9318 __asm__ ("mvn %0.8b,%1.8b"
9319 : "=w"(result)
9320 : "w"(a)
9321 : /* No clobbers */);
9322 return result;
9325 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9326 vmvn_u16 (uint16x4_t a)
9328 uint16x4_t result;
9329 __asm__ ("mvn %0.8b,%1.8b"
9330 : "=w"(result)
9331 : "w"(a)
9332 : /* No clobbers */);
9333 return result;
9336 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9337 vmvn_u32 (uint32x2_t a)
9339 uint32x2_t result;
9340 __asm__ ("mvn %0.8b,%1.8b"
9341 : "=w"(result)
9342 : "w"(a)
9343 : /* No clobbers */);
9344 return result;
9347 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9348 vmvnq_p8 (poly8x16_t a)
9350 poly8x16_t result;
9351 __asm__ ("mvn %0.16b,%1.16b"
9352 : "=w"(result)
9353 : "w"(a)
9354 : /* No clobbers */);
9355 return result;
9358 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9359 vmvnq_s8 (int8x16_t a)
9361 int8x16_t result;
9362 __asm__ ("mvn %0.16b,%1.16b"
9363 : "=w"(result)
9364 : "w"(a)
9365 : /* No clobbers */);
9366 return result;
9369 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9370 vmvnq_s16 (int16x8_t a)
9372 int16x8_t result;
9373 __asm__ ("mvn %0.16b,%1.16b"
9374 : "=w"(result)
9375 : "w"(a)
9376 : /* No clobbers */);
9377 return result;
9380 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9381 vmvnq_s32 (int32x4_t a)
9383 int32x4_t result;
9384 __asm__ ("mvn %0.16b,%1.16b"
9385 : "=w"(result)
9386 : "w"(a)
9387 : /* No clobbers */);
9388 return result;
9391 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9392 vmvnq_u8 (uint8x16_t a)
9394 uint8x16_t result;
9395 __asm__ ("mvn %0.16b,%1.16b"
9396 : "=w"(result)
9397 : "w"(a)
9398 : /* No clobbers */);
9399 return result;
9402 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9403 vmvnq_u16 (uint16x8_t a)
9405 uint16x8_t result;
9406 __asm__ ("mvn %0.16b,%1.16b"
9407 : "=w"(result)
9408 : "w"(a)
9409 : /* No clobbers */);
9410 return result;
9413 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9414 vmvnq_u32 (uint32x4_t a)
9416 uint32x4_t result;
9417 __asm__ ("mvn %0.16b,%1.16b"
9418 : "=w"(result)
9419 : "w"(a)
9420 : /* No clobbers */);
9421 return result;
9425 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9426 vpadal_s8 (int16x4_t a, int8x8_t b)
9428 int16x4_t result;
9429 __asm__ ("sadalp %0.4h,%2.8b"
9430 : "=w"(result)
9431 : "0"(a), "w"(b)
9432 : /* No clobbers */);
9433 return result;
9436 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9437 vpadal_s16 (int32x2_t a, int16x4_t b)
9439 int32x2_t result;
9440 __asm__ ("sadalp %0.2s,%2.4h"
9441 : "=w"(result)
9442 : "0"(a), "w"(b)
9443 : /* No clobbers */);
9444 return result;
9447 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9448 vpadal_s32 (int64x1_t a, int32x2_t b)
9450 int64x1_t result;
9451 __asm__ ("sadalp %0.1d,%2.2s"
9452 : "=w"(result)
9453 : "0"(a), "w"(b)
9454 : /* No clobbers */);
9455 return result;
9458 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9459 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9461 uint16x4_t result;
9462 __asm__ ("uadalp %0.4h,%2.8b"
9463 : "=w"(result)
9464 : "0"(a), "w"(b)
9465 : /* No clobbers */);
9466 return result;
9469 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9470 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9472 uint32x2_t result;
9473 __asm__ ("uadalp %0.2s,%2.4h"
9474 : "=w"(result)
9475 : "0"(a), "w"(b)
9476 : /* No clobbers */);
9477 return result;
9480 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9481 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9483 uint64x1_t result;
9484 __asm__ ("uadalp %0.1d,%2.2s"
9485 : "=w"(result)
9486 : "0"(a), "w"(b)
9487 : /* No clobbers */);
9488 return result;
9491 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9492 vpadalq_s8 (int16x8_t a, int8x16_t b)
9494 int16x8_t result;
9495 __asm__ ("sadalp %0.8h,%2.16b"
9496 : "=w"(result)
9497 : "0"(a), "w"(b)
9498 : /* No clobbers */);
9499 return result;
9502 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9503 vpadalq_s16 (int32x4_t a, int16x8_t b)
9505 int32x4_t result;
9506 __asm__ ("sadalp %0.4s,%2.8h"
9507 : "=w"(result)
9508 : "0"(a), "w"(b)
9509 : /* No clobbers */);
9510 return result;
9513 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9514 vpadalq_s32 (int64x2_t a, int32x4_t b)
9516 int64x2_t result;
9517 __asm__ ("sadalp %0.2d,%2.4s"
9518 : "=w"(result)
9519 : "0"(a), "w"(b)
9520 : /* No clobbers */);
9521 return result;
9524 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9525 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9527 uint16x8_t result;
9528 __asm__ ("uadalp %0.8h,%2.16b"
9529 : "=w"(result)
9530 : "0"(a), "w"(b)
9531 : /* No clobbers */);
9532 return result;
9535 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9536 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9538 uint32x4_t result;
9539 __asm__ ("uadalp %0.4s,%2.8h"
9540 : "=w"(result)
9541 : "0"(a), "w"(b)
9542 : /* No clobbers */);
9543 return result;
9546 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9547 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9549 uint64x2_t result;
9550 __asm__ ("uadalp %0.2d,%2.4s"
9551 : "=w"(result)
9552 : "0"(a), "w"(b)
9553 : /* No clobbers */);
9554 return result;
9557 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9558 vpadd_f32 (float32x2_t a, float32x2_t b)
9560 float32x2_t result;
9561 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9562 : "=w"(result)
9563 : "w"(a), "w"(b)
9564 : /* No clobbers */);
9565 return result;
9568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9569 vpadd_s8 (int8x8_t __a, int8x8_t __b)
9571 return __builtin_aarch64_addpv8qi (__a, __b);
9574 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9575 vpadd_s16 (int16x4_t __a, int16x4_t __b)
9577 return __builtin_aarch64_addpv4hi (__a, __b);
9580 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9581 vpadd_s32 (int32x2_t __a, int32x2_t __b)
9583 return __builtin_aarch64_addpv2si (__a, __b);
9586 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9587 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9589 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9590 (int8x8_t) __b);
9593 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9594 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9596 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9597 (int16x4_t) __b);
9600 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9601 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9603 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9604 (int32x2_t) __b);
9607 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9608 vpaddd_f64 (float64x2_t a)
9610 float64_t result;
9611 __asm__ ("faddp %d0,%1.2d"
9612 : "=w"(result)
9613 : "w"(a)
9614 : /* No clobbers */);
9615 return result;
9618 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9619 vpaddl_s8 (int8x8_t a)
9621 int16x4_t result;
9622 __asm__ ("saddlp %0.4h,%1.8b"
9623 : "=w"(result)
9624 : "w"(a)
9625 : /* No clobbers */);
9626 return result;
9629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9630 vpaddl_s16 (int16x4_t a)
9632 int32x2_t result;
9633 __asm__ ("saddlp %0.2s,%1.4h"
9634 : "=w"(result)
9635 : "w"(a)
9636 : /* No clobbers */);
9637 return result;
9640 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9641 vpaddl_s32 (int32x2_t a)
9643 int64x1_t result;
9644 __asm__ ("saddlp %0.1d,%1.2s"
9645 : "=w"(result)
9646 : "w"(a)
9647 : /* No clobbers */);
9648 return result;
9651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9652 vpaddl_u8 (uint8x8_t a)
9654 uint16x4_t result;
9655 __asm__ ("uaddlp %0.4h,%1.8b"
9656 : "=w"(result)
9657 : "w"(a)
9658 : /* No clobbers */);
9659 return result;
9662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9663 vpaddl_u16 (uint16x4_t a)
9665 uint32x2_t result;
9666 __asm__ ("uaddlp %0.2s,%1.4h"
9667 : "=w"(result)
9668 : "w"(a)
9669 : /* No clobbers */);
9670 return result;
9673 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9674 vpaddl_u32 (uint32x2_t a)
9676 uint64x1_t result;
9677 __asm__ ("uaddlp %0.1d,%1.2s"
9678 : "=w"(result)
9679 : "w"(a)
9680 : /* No clobbers */);
9681 return result;
9684 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9685 vpaddlq_s8 (int8x16_t a)
9687 int16x8_t result;
9688 __asm__ ("saddlp %0.8h,%1.16b"
9689 : "=w"(result)
9690 : "w"(a)
9691 : /* No clobbers */);
9692 return result;
9695 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9696 vpaddlq_s16 (int16x8_t a)
9698 int32x4_t result;
9699 __asm__ ("saddlp %0.4s,%1.8h"
9700 : "=w"(result)
9701 : "w"(a)
9702 : /* No clobbers */);
9703 return result;
9706 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9707 vpaddlq_s32 (int32x4_t a)
9709 int64x2_t result;
9710 __asm__ ("saddlp %0.2d,%1.4s"
9711 : "=w"(result)
9712 : "w"(a)
9713 : /* No clobbers */);
9714 return result;
9717 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9718 vpaddlq_u8 (uint8x16_t a)
9720 uint16x8_t result;
9721 __asm__ ("uaddlp %0.8h,%1.16b"
9722 : "=w"(result)
9723 : "w"(a)
9724 : /* No clobbers */);
9725 return result;
9728 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9729 vpaddlq_u16 (uint16x8_t a)
9731 uint32x4_t result;
9732 __asm__ ("uaddlp %0.4s,%1.8h"
9733 : "=w"(result)
9734 : "w"(a)
9735 : /* No clobbers */);
9736 return result;
9739 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9740 vpaddlq_u32 (uint32x4_t a)
9742 uint64x2_t result;
9743 __asm__ ("uaddlp %0.2d,%1.4s"
9744 : "=w"(result)
9745 : "w"(a)
9746 : /* No clobbers */);
9747 return result;
9750 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9751 vpaddq_f32 (float32x4_t a, float32x4_t b)
9753 float32x4_t result;
9754 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9755 : "=w"(result)
9756 : "w"(a), "w"(b)
9757 : /* No clobbers */);
9758 return result;
9761 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9762 vpaddq_f64 (float64x2_t a, float64x2_t b)
9764 float64x2_t result;
9765 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9766 : "=w"(result)
9767 : "w"(a), "w"(b)
9768 : /* No clobbers */);
9769 return result;
9772 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9773 vpaddq_s8 (int8x16_t a, int8x16_t b)
9775 int8x16_t result;
9776 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9777 : "=w"(result)
9778 : "w"(a), "w"(b)
9779 : /* No clobbers */);
9780 return result;
9783 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9784 vpaddq_s16 (int16x8_t a, int16x8_t b)
9786 int16x8_t result;
9787 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9788 : "=w"(result)
9789 : "w"(a), "w"(b)
9790 : /* No clobbers */);
9791 return result;
9794 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9795 vpaddq_s32 (int32x4_t a, int32x4_t b)
9797 int32x4_t result;
9798 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9799 : "=w"(result)
9800 : "w"(a), "w"(b)
9801 : /* No clobbers */);
9802 return result;
9805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9806 vpaddq_s64 (int64x2_t a, int64x2_t b)
9808 int64x2_t result;
9809 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9810 : "=w"(result)
9811 : "w"(a), "w"(b)
9812 : /* No clobbers */);
9813 return result;
9816 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9817 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9819 uint8x16_t result;
9820 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9821 : "=w"(result)
9822 : "w"(a), "w"(b)
9823 : /* No clobbers */);
9824 return result;
9827 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9828 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9830 uint16x8_t result;
9831 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9832 : "=w"(result)
9833 : "w"(a), "w"(b)
9834 : /* No clobbers */);
9835 return result;
9838 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9839 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9841 uint32x4_t result;
9842 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9843 : "=w"(result)
9844 : "w"(a), "w"(b)
9845 : /* No clobbers */);
9846 return result;
9849 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9850 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9852 uint64x2_t result;
9853 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9854 : "=w"(result)
9855 : "w"(a), "w"(b)
9856 : /* No clobbers */);
9857 return result;
9860 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9861 vpadds_f32 (float32x2_t a)
9863 float32_t result;
9864 __asm__ ("faddp %s0,%1.2s"
9865 : "=w"(result)
9866 : "w"(a)
9867 : /* No clobbers */);
9868 return result;
9871 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9872 vpmax_f32 (float32x2_t a, float32x2_t b)
9874 float32x2_t result;
9875 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9876 : "=w"(result)
9877 : "w"(a), "w"(b)
9878 : /* No clobbers */);
9879 return result;
9882 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9883 vpmax_s8 (int8x8_t a, int8x8_t b)
9885 int8x8_t result;
9886 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9887 : "=w"(result)
9888 : "w"(a), "w"(b)
9889 : /* No clobbers */);
9890 return result;
9893 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9894 vpmax_s16 (int16x4_t a, int16x4_t b)
9896 int16x4_t result;
9897 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9898 : "=w"(result)
9899 : "w"(a), "w"(b)
9900 : /* No clobbers */);
9901 return result;
9904 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9905 vpmax_s32 (int32x2_t a, int32x2_t b)
9907 int32x2_t result;
9908 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9909 : "=w"(result)
9910 : "w"(a), "w"(b)
9911 : /* No clobbers */);
9912 return result;
9915 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9916 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9918 uint8x8_t result;
9919 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9920 : "=w"(result)
9921 : "w"(a), "w"(b)
9922 : /* No clobbers */);
9923 return result;
9926 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9927 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9929 uint16x4_t result;
9930 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9931 : "=w"(result)
9932 : "w"(a), "w"(b)
9933 : /* No clobbers */);
9934 return result;
9937 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9938 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9940 uint32x2_t result;
9941 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9942 : "=w"(result)
9943 : "w"(a), "w"(b)
9944 : /* No clobbers */);
9945 return result;
9948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9949 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9951 float32x2_t result;
9952 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9953 : "=w"(result)
9954 : "w"(a), "w"(b)
9955 : /* No clobbers */);
9956 return result;
9959 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9960 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9962 float32x4_t result;
9963 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9964 : "=w"(result)
9965 : "w"(a), "w"(b)
9966 : /* No clobbers */);
9967 return result;
9970 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9971 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9973 float64x2_t result;
9974 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9975 : "=w"(result)
9976 : "w"(a), "w"(b)
9977 : /* No clobbers */);
9978 return result;
9981 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9982 vpmaxnmqd_f64 (float64x2_t a)
9984 float64_t result;
9985 __asm__ ("fmaxnmp %d0,%1.2d"
9986 : "=w"(result)
9987 : "w"(a)
9988 : /* No clobbers */);
9989 return result;
9992 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9993 vpmaxnms_f32 (float32x2_t a)
9995 float32_t result;
9996 __asm__ ("fmaxnmp %s0,%1.2s"
9997 : "=w"(result)
9998 : "w"(a)
9999 : /* No clobbers */);
10000 return result;
10003 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10004 vpmaxq_f32 (float32x4_t a, float32x4_t b)
10006 float32x4_t result;
10007 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
10008 : "=w"(result)
10009 : "w"(a), "w"(b)
10010 : /* No clobbers */);
10011 return result;
10014 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10015 vpmaxq_f64 (float64x2_t a, float64x2_t b)
10017 float64x2_t result;
10018 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
10019 : "=w"(result)
10020 : "w"(a), "w"(b)
10021 : /* No clobbers */);
10022 return result;
10025 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10026 vpmaxq_s8 (int8x16_t a, int8x16_t b)
10028 int8x16_t result;
10029 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
10030 : "=w"(result)
10031 : "w"(a), "w"(b)
10032 : /* No clobbers */);
10033 return result;
10036 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10037 vpmaxq_s16 (int16x8_t a, int16x8_t b)
10039 int16x8_t result;
10040 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
10041 : "=w"(result)
10042 : "w"(a), "w"(b)
10043 : /* No clobbers */);
10044 return result;
10047 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10048 vpmaxq_s32 (int32x4_t a, int32x4_t b)
10050 int32x4_t result;
10051 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
10052 : "=w"(result)
10053 : "w"(a), "w"(b)
10054 : /* No clobbers */);
10055 return result;
10058 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10059 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
10061 uint8x16_t result;
10062 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
10063 : "=w"(result)
10064 : "w"(a), "w"(b)
10065 : /* No clobbers */);
10066 return result;
10069 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10070 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
10072 uint16x8_t result;
10073 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
10074 : "=w"(result)
10075 : "w"(a), "w"(b)
10076 : /* No clobbers */);
10077 return result;
10080 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10081 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
10083 uint32x4_t result;
10084 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
10085 : "=w"(result)
10086 : "w"(a), "w"(b)
10087 : /* No clobbers */);
10088 return result;
10091 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10092 vpmaxqd_f64 (float64x2_t a)
10094 float64_t result;
10095 __asm__ ("fmaxp %d0,%1.2d"
10096 : "=w"(result)
10097 : "w"(a)
10098 : /* No clobbers */);
10099 return result;
10102 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10103 vpmaxs_f32 (float32x2_t a)
10105 float32_t result;
10106 __asm__ ("fmaxp %s0,%1.2s"
10107 : "=w"(result)
10108 : "w"(a)
10109 : /* No clobbers */);
10110 return result;
10113 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10114 vpmin_f32 (float32x2_t a, float32x2_t b)
10116 float32x2_t result;
10117 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
10118 : "=w"(result)
10119 : "w"(a), "w"(b)
10120 : /* No clobbers */);
10121 return result;
10124 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10125 vpmin_s8 (int8x8_t a, int8x8_t b)
10127 int8x8_t result;
10128 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
10129 : "=w"(result)
10130 : "w"(a), "w"(b)
10131 : /* No clobbers */);
10132 return result;
10135 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10136 vpmin_s16 (int16x4_t a, int16x4_t b)
10138 int16x4_t result;
10139 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
10140 : "=w"(result)
10141 : "w"(a), "w"(b)
10142 : /* No clobbers */);
10143 return result;
10146 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10147 vpmin_s32 (int32x2_t a, int32x2_t b)
10149 int32x2_t result;
10150 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
10151 : "=w"(result)
10152 : "w"(a), "w"(b)
10153 : /* No clobbers */);
10154 return result;
10157 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10158 vpmin_u8 (uint8x8_t a, uint8x8_t b)
10160 uint8x8_t result;
10161 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
10162 : "=w"(result)
10163 : "w"(a), "w"(b)
10164 : /* No clobbers */);
10165 return result;
10168 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10169 vpmin_u16 (uint16x4_t a, uint16x4_t b)
10171 uint16x4_t result;
10172 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
10173 : "=w"(result)
10174 : "w"(a), "w"(b)
10175 : /* No clobbers */);
10176 return result;
10179 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10180 vpmin_u32 (uint32x2_t a, uint32x2_t b)
10182 uint32x2_t result;
10183 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
10184 : "=w"(result)
10185 : "w"(a), "w"(b)
10186 : /* No clobbers */);
10187 return result;
10190 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10191 vpminnm_f32 (float32x2_t a, float32x2_t b)
10193 float32x2_t result;
10194 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
10195 : "=w"(result)
10196 : "w"(a), "w"(b)
10197 : /* No clobbers */);
10198 return result;
10201 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10202 vpminnmq_f32 (float32x4_t a, float32x4_t b)
10204 float32x4_t result;
10205 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
10206 : "=w"(result)
10207 : "w"(a), "w"(b)
10208 : /* No clobbers */);
10209 return result;
10212 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10213 vpminnmq_f64 (float64x2_t a, float64x2_t b)
10215 float64x2_t result;
10216 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
10217 : "=w"(result)
10218 : "w"(a), "w"(b)
10219 : /* No clobbers */);
10220 return result;
10223 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10224 vpminnmqd_f64 (float64x2_t a)
10226 float64_t result;
10227 __asm__ ("fminnmp %d0,%1.2d"
10228 : "=w"(result)
10229 : "w"(a)
10230 : /* No clobbers */);
10231 return result;
10234 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10235 vpminnms_f32 (float32x2_t a)
10237 float32_t result;
10238 __asm__ ("fminnmp %s0,%1.2s"
10239 : "=w"(result)
10240 : "w"(a)
10241 : /* No clobbers */);
10242 return result;
10245 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10246 vpminq_f32 (float32x4_t a, float32x4_t b)
10248 float32x4_t result;
10249 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
10250 : "=w"(result)
10251 : "w"(a), "w"(b)
10252 : /* No clobbers */);
10253 return result;
10256 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10257 vpminq_f64 (float64x2_t a, float64x2_t b)
10259 float64x2_t result;
10260 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
10261 : "=w"(result)
10262 : "w"(a), "w"(b)
10263 : /* No clobbers */);
10264 return result;
10267 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10268 vpminq_s8 (int8x16_t a, int8x16_t b)
10270 int8x16_t result;
10271 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
10272 : "=w"(result)
10273 : "w"(a), "w"(b)
10274 : /* No clobbers */);
10275 return result;
10278 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10279 vpminq_s16 (int16x8_t a, int16x8_t b)
10281 int16x8_t result;
10282 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
10283 : "=w"(result)
10284 : "w"(a), "w"(b)
10285 : /* No clobbers */);
10286 return result;
10289 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10290 vpminq_s32 (int32x4_t a, int32x4_t b)
10292 int32x4_t result;
10293 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10294 : "=w"(result)
10295 : "w"(a), "w"(b)
10296 : /* No clobbers */);
10297 return result;
10300 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10301 vpminq_u8 (uint8x16_t a, uint8x16_t b)
10303 uint8x16_t result;
10304 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10305 : "=w"(result)
10306 : "w"(a), "w"(b)
10307 : /* No clobbers */);
10308 return result;
10311 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10312 vpminq_u16 (uint16x8_t a, uint16x8_t b)
10314 uint16x8_t result;
10315 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10316 : "=w"(result)
10317 : "w"(a), "w"(b)
10318 : /* No clobbers */);
10319 return result;
10322 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10323 vpminq_u32 (uint32x4_t a, uint32x4_t b)
10325 uint32x4_t result;
10326 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10327 : "=w"(result)
10328 : "w"(a), "w"(b)
10329 : /* No clobbers */);
10330 return result;
10333 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10334 vpminqd_f64 (float64x2_t a)
10336 float64_t result;
10337 __asm__ ("fminp %d0,%1.2d"
10338 : "=w"(result)
10339 : "w"(a)
10340 : /* No clobbers */);
10341 return result;
10344 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10345 vpmins_f32 (float32x2_t a)
10347 float32_t result;
10348 __asm__ ("fminp %s0,%1.2s"
10349 : "=w"(result)
10350 : "w"(a)
10351 : /* No clobbers */);
10352 return result;
10355 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10356 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10358 int16x4_t result;
10359 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10360 : "=w"(result)
10361 : "w"(a), "w"(b)
10362 : /* No clobbers */);
10363 return result;
10366 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10367 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10369 int32x2_t result;
10370 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10371 : "=w"(result)
10372 : "w"(a), "w"(b)
10373 : /* No clobbers */);
10374 return result;
10377 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10378 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10380 int16x8_t result;
10381 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10382 : "=w"(result)
10383 : "w"(a), "w"(b)
10384 : /* No clobbers */);
10385 return result;
10388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10389 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10391 int32x4_t result;
10392 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10393 : "=w"(result)
10394 : "w"(a), "w"(b)
10395 : /* No clobbers */);
10396 return result;
10399 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10400 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10402 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10403 __asm__ ("sqxtn2 %0.16b, %1.8h"
10404 : "+w"(result)
10405 : "w"(b)
10406 : /* No clobbers */);
10407 return result;
10410 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10411 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10413 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10414 __asm__ ("sqxtn2 %0.8h, %1.4s"
10415 : "+w"(result)
10416 : "w"(b)
10417 : /* No clobbers */);
10418 return result;
10421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10422 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10424 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10425 __asm__ ("sqxtn2 %0.4s, %1.2d"
10426 : "+w"(result)
10427 : "w"(b)
10428 : /* No clobbers */);
10429 return result;
10432 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10433 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10435 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10436 __asm__ ("uqxtn2 %0.16b, %1.8h"
10437 : "+w"(result)
10438 : "w"(b)
10439 : /* No clobbers */);
10440 return result;
10443 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10444 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10446 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10447 __asm__ ("uqxtn2 %0.8h, %1.4s"
10448 : "+w"(result)
10449 : "w"(b)
10450 : /* No clobbers */);
10451 return result;
10454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10455 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10457 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10458 __asm__ ("uqxtn2 %0.4s, %1.2d"
10459 : "+w"(result)
10460 : "w"(b)
10461 : /* No clobbers */);
10462 return result;
10465 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10466 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10468 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10469 __asm__ ("sqxtun2 %0.16b, %1.8h"
10470 : "+w"(result)
10471 : "w"(b)
10472 : /* No clobbers */);
10473 return result;
10476 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10477 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10479 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10480 __asm__ ("sqxtun2 %0.8h, %1.4s"
10481 : "+w"(result)
10482 : "w"(b)
10483 : /* No clobbers */);
10484 return result;
10487 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10488 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10490 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10491 __asm__ ("sqxtun2 %0.4s, %1.2d"
10492 : "+w"(result)
10493 : "w"(b)
10494 : /* No clobbers */);
10495 return result;
10498 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10499 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10501 int16x4_t result;
10502 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10503 : "=w"(result)
10504 : "w"(a), "x"(b)
10505 : /* No clobbers */);
10506 return result;
10509 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10510 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10512 int32x2_t result;
10513 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10514 : "=w"(result)
10515 : "w"(a), "w"(b)
10516 : /* No clobbers */);
10517 return result;
10520 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10521 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10523 int16x8_t result;
10524 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10525 : "=w"(result)
10526 : "w"(a), "x"(b)
10527 : /* No clobbers */);
10528 return result;
10531 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10532 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10534 int32x4_t result;
10535 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10536 : "=w"(result)
10537 : "w"(a), "w"(b)
10538 : /* No clobbers */);
10539 return result;
10542 #define vqrshrn_high_n_s16(a, b, c) \
10543 __extension__ \
10544 ({ \
10545 int16x8_t b_ = (b); \
10546 int8x8_t a_ = (a); \
10547 int8x16_t result = vcombine_s8 \
10548 (a_, vcreate_s8 \
10549 (__AARCH64_UINT64_C (0x0))); \
10550 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10551 : "+w"(result) \
10552 : "w"(b_), "i"(c) \
10553 : /* No clobbers */); \
10554 result; \
10557 #define vqrshrn_high_n_s32(a, b, c) \
10558 __extension__ \
10559 ({ \
10560 int32x4_t b_ = (b); \
10561 int16x4_t a_ = (a); \
10562 int16x8_t result = vcombine_s16 \
10563 (a_, vcreate_s16 \
10564 (__AARCH64_UINT64_C (0x0))); \
10565 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10566 : "+w"(result) \
10567 : "w"(b_), "i"(c) \
10568 : /* No clobbers */); \
10569 result; \
10572 #define vqrshrn_high_n_s64(a, b, c) \
10573 __extension__ \
10574 ({ \
10575 int64x2_t b_ = (b); \
10576 int32x2_t a_ = (a); \
10577 int32x4_t result = vcombine_s32 \
10578 (a_, vcreate_s32 \
10579 (__AARCH64_UINT64_C (0x0))); \
10580 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10581 : "+w"(result) \
10582 : "w"(b_), "i"(c) \
10583 : /* No clobbers */); \
10584 result; \
10587 #define vqrshrn_high_n_u16(a, b, c) \
10588 __extension__ \
10589 ({ \
10590 uint16x8_t b_ = (b); \
10591 uint8x8_t a_ = (a); \
10592 uint8x16_t result = vcombine_u8 \
10593 (a_, vcreate_u8 \
10594 (__AARCH64_UINT64_C (0x0))); \
10595 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10596 : "+w"(result) \
10597 : "w"(b_), "i"(c) \
10598 : /* No clobbers */); \
10599 result; \
10602 #define vqrshrn_high_n_u32(a, b, c) \
10603 __extension__ \
10604 ({ \
10605 uint32x4_t b_ = (b); \
10606 uint16x4_t a_ = (a); \
10607 uint16x8_t result = vcombine_u16 \
10608 (a_, vcreate_u16 \
10609 (__AARCH64_UINT64_C (0x0))); \
10610 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10611 : "+w"(result) \
10612 : "w"(b_), "i"(c) \
10613 : /* No clobbers */); \
10614 result; \
10617 #define vqrshrn_high_n_u64(a, b, c) \
10618 __extension__ \
10619 ({ \
10620 uint64x2_t b_ = (b); \
10621 uint32x2_t a_ = (a); \
10622 uint32x4_t result = vcombine_u32 \
10623 (a_, vcreate_u32 \
10624 (__AARCH64_UINT64_C (0x0))); \
10625 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10626 : "+w"(result) \
10627 : "w"(b_), "i"(c) \
10628 : /* No clobbers */); \
10629 result; \
10632 #define vqrshrun_high_n_s16(a, b, c) \
10633 __extension__ \
10634 ({ \
10635 int16x8_t b_ = (b); \
10636 uint8x8_t a_ = (a); \
10637 uint8x16_t result = vcombine_u8 \
10638 (a_, vcreate_u8 \
10639 (__AARCH64_UINT64_C (0x0))); \
10640 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10641 : "+w"(result) \
10642 : "w"(b_), "i"(c) \
10643 : /* No clobbers */); \
10644 result; \
10647 #define vqrshrun_high_n_s32(a, b, c) \
10648 __extension__ \
10649 ({ \
10650 int32x4_t b_ = (b); \
10651 uint16x4_t a_ = (a); \
10652 uint16x8_t result = vcombine_u16 \
10653 (a_, vcreate_u16 \
10654 (__AARCH64_UINT64_C (0x0))); \
10655 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10656 : "+w"(result) \
10657 : "w"(b_), "i"(c) \
10658 : /* No clobbers */); \
10659 result; \
10662 #define vqrshrun_high_n_s64(a, b, c) \
10663 __extension__ \
10664 ({ \
10665 int64x2_t b_ = (b); \
10666 uint32x2_t a_ = (a); \
10667 uint32x4_t result = vcombine_u32 \
10668 (a_, vcreate_u32 \
10669 (__AARCH64_UINT64_C (0x0))); \
10670 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10671 : "+w"(result) \
10672 : "w"(b_), "i"(c) \
10673 : /* No clobbers */); \
10674 result; \
10677 #define vqshrn_high_n_s16(a, b, c) \
10678 __extension__ \
10679 ({ \
10680 int16x8_t b_ = (b); \
10681 int8x8_t a_ = (a); \
10682 int8x16_t result = vcombine_s8 \
10683 (a_, vcreate_s8 \
10684 (__AARCH64_UINT64_C (0x0))); \
10685 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10686 : "+w"(result) \
10687 : "w"(b_), "i"(c) \
10688 : /* No clobbers */); \
10689 result; \
10692 #define vqshrn_high_n_s32(a, b, c) \
10693 __extension__ \
10694 ({ \
10695 int32x4_t b_ = (b); \
10696 int16x4_t a_ = (a); \
10697 int16x8_t result = vcombine_s16 \
10698 (a_, vcreate_s16 \
10699 (__AARCH64_UINT64_C (0x0))); \
10700 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10701 : "+w"(result) \
10702 : "w"(b_), "i"(c) \
10703 : /* No clobbers */); \
10704 result; \
10707 #define vqshrn_high_n_s64(a, b, c) \
10708 __extension__ \
10709 ({ \
10710 int64x2_t b_ = (b); \
10711 int32x2_t a_ = (a); \
10712 int32x4_t result = vcombine_s32 \
10713 (a_, vcreate_s32 \
10714 (__AARCH64_UINT64_C (0x0))); \
10715 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10716 : "+w"(result) \
10717 : "w"(b_), "i"(c) \
10718 : /* No clobbers */); \
10719 result; \
10722 #define vqshrn_high_n_u16(a, b, c) \
10723 __extension__ \
10724 ({ \
10725 uint16x8_t b_ = (b); \
10726 uint8x8_t a_ = (a); \
10727 uint8x16_t result = vcombine_u8 \
10728 (a_, vcreate_u8 \
10729 (__AARCH64_UINT64_C (0x0))); \
10730 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10731 : "+w"(result) \
10732 : "w"(b_), "i"(c) \
10733 : /* No clobbers */); \
10734 result; \
10737 #define vqshrn_high_n_u32(a, b, c) \
10738 __extension__ \
10739 ({ \
10740 uint32x4_t b_ = (b); \
10741 uint16x4_t a_ = (a); \
10742 uint16x8_t result = vcombine_u16 \
10743 (a_, vcreate_u16 \
10744 (__AARCH64_UINT64_C (0x0))); \
10745 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10746 : "+w"(result) \
10747 : "w"(b_), "i"(c) \
10748 : /* No clobbers */); \
10749 result; \
10752 #define vqshrn_high_n_u64(a, b, c) \
10753 __extension__ \
10754 ({ \
10755 uint64x2_t b_ = (b); \
10756 uint32x2_t a_ = (a); \
10757 uint32x4_t result = vcombine_u32 \
10758 (a_, vcreate_u32 \
10759 (__AARCH64_UINT64_C (0x0))); \
10760 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10761 : "+w"(result) \
10762 : "w"(b_), "i"(c) \
10763 : /* No clobbers */); \
10764 result; \
10767 #define vqshrun_high_n_s16(a, b, c) \
10768 __extension__ \
10769 ({ \
10770 int16x8_t b_ = (b); \
10771 uint8x8_t a_ = (a); \
10772 uint8x16_t result = vcombine_u8 \
10773 (a_, vcreate_u8 \
10774 (__AARCH64_UINT64_C (0x0))); \
10775 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10776 : "+w"(result) \
10777 : "w"(b_), "i"(c) \
10778 : /* No clobbers */); \
10779 result; \
10782 #define vqshrun_high_n_s32(a, b, c) \
10783 __extension__ \
10784 ({ \
10785 int32x4_t b_ = (b); \
10786 uint16x4_t a_ = (a); \
10787 uint16x8_t result = vcombine_u16 \
10788 (a_, vcreate_u16 \
10789 (__AARCH64_UINT64_C (0x0))); \
10790 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10791 : "+w"(result) \
10792 : "w"(b_), "i"(c) \
10793 : /* No clobbers */); \
10794 result; \
10797 #define vqshrun_high_n_s64(a, b, c) \
10798 __extension__ \
10799 ({ \
10800 int64x2_t b_ = (b); \
10801 uint32x2_t a_ = (a); \
10802 uint32x4_t result = vcombine_u32 \
10803 (a_, vcreate_u32 \
10804 (__AARCH64_UINT64_C (0x0))); \
10805 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10806 : "+w"(result) \
10807 : "w"(b_), "i"(c) \
10808 : /* No clobbers */); \
10809 result; \
10812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10813 vrbit_s8 (int8x8_t a)
10815 int8x8_t result;
10816 __asm__ ("rbit %0.8b,%1.8b"
10817 : "=w"(result)
10818 : "w"(a)
10819 : /* No clobbers */);
10820 return result;
10823 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10824 vrbit_u8 (uint8x8_t a)
10826 uint8x8_t result;
10827 __asm__ ("rbit %0.8b,%1.8b"
10828 : "=w"(result)
10829 : "w"(a)
10830 : /* No clobbers */);
10831 return result;
10834 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10835 vrbitq_s8 (int8x16_t a)
10837 int8x16_t result;
10838 __asm__ ("rbit %0.16b,%1.16b"
10839 : "=w"(result)
10840 : "w"(a)
10841 : /* No clobbers */);
10842 return result;
10845 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10846 vrbitq_u8 (uint8x16_t a)
10848 uint8x16_t result;
10849 __asm__ ("rbit %0.16b,%1.16b"
10850 : "=w"(result)
10851 : "w"(a)
10852 : /* No clobbers */);
10853 return result;
10856 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10857 vrecpe_u32 (uint32x2_t a)
10859 uint32x2_t result;
10860 __asm__ ("urecpe %0.2s,%1.2s"
10861 : "=w"(result)
10862 : "w"(a)
10863 : /* No clobbers */);
10864 return result;
10867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10868 vrecpeq_u32 (uint32x4_t a)
10870 uint32x4_t result;
10871 __asm__ ("urecpe %0.4s,%1.4s"
10872 : "=w"(result)
10873 : "w"(a)
10874 : /* No clobbers */);
10875 return result;
10878 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10879 vrev16_p8 (poly8x8_t a)
10881 poly8x8_t result;
10882 __asm__ ("rev16 %0.8b,%1.8b"
10883 : "=w"(result)
10884 : "w"(a)
10885 : /* No clobbers */);
10886 return result;
10889 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10890 vrev16_s8 (int8x8_t a)
10892 int8x8_t result;
10893 __asm__ ("rev16 %0.8b,%1.8b"
10894 : "=w"(result)
10895 : "w"(a)
10896 : /* No clobbers */);
10897 return result;
10900 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10901 vrev16_u8 (uint8x8_t a)
10903 uint8x8_t result;
10904 __asm__ ("rev16 %0.8b,%1.8b"
10905 : "=w"(result)
10906 : "w"(a)
10907 : /* No clobbers */);
10908 return result;
10911 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10912 vrev16q_p8 (poly8x16_t a)
10914 poly8x16_t result;
10915 __asm__ ("rev16 %0.16b,%1.16b"
10916 : "=w"(result)
10917 : "w"(a)
10918 : /* No clobbers */);
10919 return result;
10922 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10923 vrev16q_s8 (int8x16_t a)
10925 int8x16_t result;
10926 __asm__ ("rev16 %0.16b,%1.16b"
10927 : "=w"(result)
10928 : "w"(a)
10929 : /* No clobbers */);
10930 return result;
10933 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10934 vrev16q_u8 (uint8x16_t a)
10936 uint8x16_t result;
10937 __asm__ ("rev16 %0.16b,%1.16b"
10938 : "=w"(result)
10939 : "w"(a)
10940 : /* No clobbers */);
10941 return result;
10944 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10945 vrev32_p8 (poly8x8_t a)
10947 poly8x8_t result;
10948 __asm__ ("rev32 %0.8b,%1.8b"
10949 : "=w"(result)
10950 : "w"(a)
10951 : /* No clobbers */);
10952 return result;
10955 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10956 vrev32_p16 (poly16x4_t a)
10958 poly16x4_t result;
10959 __asm__ ("rev32 %0.4h,%1.4h"
10960 : "=w"(result)
10961 : "w"(a)
10962 : /* No clobbers */);
10963 return result;
10966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10967 vrev32_s8 (int8x8_t a)
10969 int8x8_t result;
10970 __asm__ ("rev32 %0.8b,%1.8b"
10971 : "=w"(result)
10972 : "w"(a)
10973 : /* No clobbers */);
10974 return result;
10977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10978 vrev32_s16 (int16x4_t a)
10980 int16x4_t result;
10981 __asm__ ("rev32 %0.4h,%1.4h"
10982 : "=w"(result)
10983 : "w"(a)
10984 : /* No clobbers */);
10985 return result;
10988 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10989 vrev32_u8 (uint8x8_t a)
10991 uint8x8_t result;
10992 __asm__ ("rev32 %0.8b,%1.8b"
10993 : "=w"(result)
10994 : "w"(a)
10995 : /* No clobbers */);
10996 return result;
10999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11000 vrev32_u16 (uint16x4_t a)
11002 uint16x4_t result;
11003 __asm__ ("rev32 %0.4h,%1.4h"
11004 : "=w"(result)
11005 : "w"(a)
11006 : /* No clobbers */);
11007 return result;
11010 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11011 vrev32q_p8 (poly8x16_t a)
11013 poly8x16_t result;
11014 __asm__ ("rev32 %0.16b,%1.16b"
11015 : "=w"(result)
11016 : "w"(a)
11017 : /* No clobbers */);
11018 return result;
11021 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11022 vrev32q_p16 (poly16x8_t a)
11024 poly16x8_t result;
11025 __asm__ ("rev32 %0.8h,%1.8h"
11026 : "=w"(result)
11027 : "w"(a)
11028 : /* No clobbers */);
11029 return result;
11032 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11033 vrev32q_s8 (int8x16_t a)
11035 int8x16_t result;
11036 __asm__ ("rev32 %0.16b,%1.16b"
11037 : "=w"(result)
11038 : "w"(a)
11039 : /* No clobbers */);
11040 return result;
11043 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11044 vrev32q_s16 (int16x8_t a)
11046 int16x8_t result;
11047 __asm__ ("rev32 %0.8h,%1.8h"
11048 : "=w"(result)
11049 : "w"(a)
11050 : /* No clobbers */);
11051 return result;
11054 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11055 vrev32q_u8 (uint8x16_t a)
11057 uint8x16_t result;
11058 __asm__ ("rev32 %0.16b,%1.16b"
11059 : "=w"(result)
11060 : "w"(a)
11061 : /* No clobbers */);
11062 return result;
11065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11066 vrev32q_u16 (uint16x8_t a)
11068 uint16x8_t result;
11069 __asm__ ("rev32 %0.8h,%1.8h"
11070 : "=w"(result)
11071 : "w"(a)
11072 : /* No clobbers */);
11073 return result;
11076 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11077 vrev64_f32 (float32x2_t a)
11079 float32x2_t result;
11080 __asm__ ("rev64 %0.2s,%1.2s"
11081 : "=w"(result)
11082 : "w"(a)
11083 : /* No clobbers */);
11084 return result;
11087 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11088 vrev64_p8 (poly8x8_t a)
11090 poly8x8_t result;
11091 __asm__ ("rev64 %0.8b,%1.8b"
11092 : "=w"(result)
11093 : "w"(a)
11094 : /* No clobbers */);
11095 return result;
11098 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11099 vrev64_p16 (poly16x4_t a)
11101 poly16x4_t result;
11102 __asm__ ("rev64 %0.4h,%1.4h"
11103 : "=w"(result)
11104 : "w"(a)
11105 : /* No clobbers */);
11106 return result;
11109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11110 vrev64_s8 (int8x8_t a)
11112 int8x8_t result;
11113 __asm__ ("rev64 %0.8b,%1.8b"
11114 : "=w"(result)
11115 : "w"(a)
11116 : /* No clobbers */);
11117 return result;
11120 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11121 vrev64_s16 (int16x4_t a)
11123 int16x4_t result;
11124 __asm__ ("rev64 %0.4h,%1.4h"
11125 : "=w"(result)
11126 : "w"(a)
11127 : /* No clobbers */);
11128 return result;
11131 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11132 vrev64_s32 (int32x2_t a)
11134 int32x2_t result;
11135 __asm__ ("rev64 %0.2s,%1.2s"
11136 : "=w"(result)
11137 : "w"(a)
11138 : /* No clobbers */);
11139 return result;
11142 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11143 vrev64_u8 (uint8x8_t a)
11145 uint8x8_t result;
11146 __asm__ ("rev64 %0.8b,%1.8b"
11147 : "=w"(result)
11148 : "w"(a)
11149 : /* No clobbers */);
11150 return result;
11153 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11154 vrev64_u16 (uint16x4_t a)
11156 uint16x4_t result;
11157 __asm__ ("rev64 %0.4h,%1.4h"
11158 : "=w"(result)
11159 : "w"(a)
11160 : /* No clobbers */);
11161 return result;
11164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11165 vrev64_u32 (uint32x2_t a)
11167 uint32x2_t result;
11168 __asm__ ("rev64 %0.2s,%1.2s"
11169 : "=w"(result)
11170 : "w"(a)
11171 : /* No clobbers */);
11172 return result;
11175 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11176 vrev64q_f32 (float32x4_t a)
11178 float32x4_t result;
11179 __asm__ ("rev64 %0.4s,%1.4s"
11180 : "=w"(result)
11181 : "w"(a)
11182 : /* No clobbers */);
11183 return result;
11186 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11187 vrev64q_p8 (poly8x16_t a)
11189 poly8x16_t result;
11190 __asm__ ("rev64 %0.16b,%1.16b"
11191 : "=w"(result)
11192 : "w"(a)
11193 : /* No clobbers */);
11194 return result;
11197 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11198 vrev64q_p16 (poly16x8_t a)
11200 poly16x8_t result;
11201 __asm__ ("rev64 %0.8h,%1.8h"
11202 : "=w"(result)
11203 : "w"(a)
11204 : /* No clobbers */);
11205 return result;
11208 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11209 vrev64q_s8 (int8x16_t a)
11211 int8x16_t result;
11212 __asm__ ("rev64 %0.16b,%1.16b"
11213 : "=w"(result)
11214 : "w"(a)
11215 : /* No clobbers */);
11216 return result;
11219 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11220 vrev64q_s16 (int16x8_t a)
11222 int16x8_t result;
11223 __asm__ ("rev64 %0.8h,%1.8h"
11224 : "=w"(result)
11225 : "w"(a)
11226 : /* No clobbers */);
11227 return result;
11230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11231 vrev64q_s32 (int32x4_t a)
11233 int32x4_t result;
11234 __asm__ ("rev64 %0.4s,%1.4s"
11235 : "=w"(result)
11236 : "w"(a)
11237 : /* No clobbers */);
11238 return result;
11241 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11242 vrev64q_u8 (uint8x16_t a)
11244 uint8x16_t result;
11245 __asm__ ("rev64 %0.16b,%1.16b"
11246 : "=w"(result)
11247 : "w"(a)
11248 : /* No clobbers */);
11249 return result;
11252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11253 vrev64q_u16 (uint16x8_t a)
11255 uint16x8_t result;
11256 __asm__ ("rev64 %0.8h,%1.8h"
11257 : "=w"(result)
11258 : "w"(a)
11259 : /* No clobbers */);
11260 return result;
11263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11264 vrev64q_u32 (uint32x4_t a)
11266 uint32x4_t result;
11267 __asm__ ("rev64 %0.4s,%1.4s"
11268 : "=w"(result)
11269 : "w"(a)
11270 : /* No clobbers */);
11271 return result;
11274 #define vrshrn_high_n_s16(a, b, c) \
11275 __extension__ \
11276 ({ \
11277 int16x8_t b_ = (b); \
11278 int8x8_t a_ = (a); \
11279 int8x16_t result = vcombine_s8 \
11280 (a_, vcreate_s8 \
11281 (__AARCH64_UINT64_C (0x0))); \
11282 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11283 : "+w"(result) \
11284 : "w"(b_), "i"(c) \
11285 : /* No clobbers */); \
11286 result; \
11289 #define vrshrn_high_n_s32(a, b, c) \
11290 __extension__ \
11291 ({ \
11292 int32x4_t b_ = (b); \
11293 int16x4_t a_ = (a); \
11294 int16x8_t result = vcombine_s16 \
11295 (a_, vcreate_s16 \
11296 (__AARCH64_UINT64_C (0x0))); \
11297 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11298 : "+w"(result) \
11299 : "w"(b_), "i"(c) \
11300 : /* No clobbers */); \
11301 result; \
11304 #define vrshrn_high_n_s64(a, b, c) \
11305 __extension__ \
11306 ({ \
11307 int64x2_t b_ = (b); \
11308 int32x2_t a_ = (a); \
11309 int32x4_t result = vcombine_s32 \
11310 (a_, vcreate_s32 \
11311 (__AARCH64_UINT64_C (0x0))); \
11312 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11313 : "+w"(result) \
11314 : "w"(b_), "i"(c) \
11315 : /* No clobbers */); \
11316 result; \
11319 #define vrshrn_high_n_u16(a, b, c) \
11320 __extension__ \
11321 ({ \
11322 uint16x8_t b_ = (b); \
11323 uint8x8_t a_ = (a); \
11324 uint8x16_t result = vcombine_u8 \
11325 (a_, vcreate_u8 \
11326 (__AARCH64_UINT64_C (0x0))); \
11327 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11328 : "+w"(result) \
11329 : "w"(b_), "i"(c) \
11330 : /* No clobbers */); \
11331 result; \
11334 #define vrshrn_high_n_u32(a, b, c) \
11335 __extension__ \
11336 ({ \
11337 uint32x4_t b_ = (b); \
11338 uint16x4_t a_ = (a); \
11339 uint16x8_t result = vcombine_u16 \
11340 (a_, vcreate_u16 \
11341 (__AARCH64_UINT64_C (0x0))); \
11342 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11343 : "+w"(result) \
11344 : "w"(b_), "i"(c) \
11345 : /* No clobbers */); \
11346 result; \
11349 #define vrshrn_high_n_u64(a, b, c) \
11350 __extension__ \
11351 ({ \
11352 uint64x2_t b_ = (b); \
11353 uint32x2_t a_ = (a); \
11354 uint32x4_t result = vcombine_u32 \
11355 (a_, vcreate_u32 \
11356 (__AARCH64_UINT64_C (0x0))); \
11357 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11358 : "+w"(result) \
11359 : "w"(b_), "i"(c) \
11360 : /* No clobbers */); \
11361 result; \
11364 #define vrshrn_n_s16(a, b) \
11365 __extension__ \
11366 ({ \
11367 int16x8_t a_ = (a); \
11368 int8x8_t result; \
11369 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11370 : "=w"(result) \
11371 : "w"(a_), "i"(b) \
11372 : /* No clobbers */); \
11373 result; \
11376 #define vrshrn_n_s32(a, b) \
11377 __extension__ \
11378 ({ \
11379 int32x4_t a_ = (a); \
11380 int16x4_t result; \
11381 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11382 : "=w"(result) \
11383 : "w"(a_), "i"(b) \
11384 : /* No clobbers */); \
11385 result; \
11388 #define vrshrn_n_s64(a, b) \
11389 __extension__ \
11390 ({ \
11391 int64x2_t a_ = (a); \
11392 int32x2_t result; \
11393 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11394 : "=w"(result) \
11395 : "w"(a_), "i"(b) \
11396 : /* No clobbers */); \
11397 result; \
11400 #define vrshrn_n_u16(a, b) \
11401 __extension__ \
11402 ({ \
11403 uint16x8_t a_ = (a); \
11404 uint8x8_t result; \
11405 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11406 : "=w"(result) \
11407 : "w"(a_), "i"(b) \
11408 : /* No clobbers */); \
11409 result; \
11412 #define vrshrn_n_u32(a, b) \
11413 __extension__ \
11414 ({ \
11415 uint32x4_t a_ = (a); \
11416 uint16x4_t result; \
11417 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11418 : "=w"(result) \
11419 : "w"(a_), "i"(b) \
11420 : /* No clobbers */); \
11421 result; \
11424 #define vrshrn_n_u64(a, b) \
11425 __extension__ \
11426 ({ \
11427 uint64x2_t a_ = (a); \
11428 uint32x2_t result; \
11429 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11430 : "=w"(result) \
11431 : "w"(a_), "i"(b) \
11432 : /* No clobbers */); \
11433 result; \
11436 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11437 vrsqrte_f32 (float32x2_t a)
11439 float32x2_t result;
11440 __asm__ ("frsqrte %0.2s,%1.2s"
11441 : "=w"(result)
11442 : "w"(a)
11443 : /* No clobbers */);
11444 return result;
11447 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11448 vrsqrte_f64 (float64x1_t a)
11450 float64x1_t result;
11451 __asm__ ("frsqrte %d0,%d1"
11452 : "=w"(result)
11453 : "w"(a)
11454 : /* No clobbers */);
11455 return result;
11458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11459 vrsqrte_u32 (uint32x2_t a)
11461 uint32x2_t result;
11462 __asm__ ("ursqrte %0.2s,%1.2s"
11463 : "=w"(result)
11464 : "w"(a)
11465 : /* No clobbers */);
11466 return result;
11469 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11470 vrsqrted_f64 (float64_t a)
11472 float64_t result;
11473 __asm__ ("frsqrte %d0,%d1"
11474 : "=w"(result)
11475 : "w"(a)
11476 : /* No clobbers */);
11477 return result;
11480 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11481 vrsqrteq_f32 (float32x4_t a)
11483 float32x4_t result;
11484 __asm__ ("frsqrte %0.4s,%1.4s"
11485 : "=w"(result)
11486 : "w"(a)
11487 : /* No clobbers */);
11488 return result;
11491 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11492 vrsqrteq_f64 (float64x2_t a)
11494 float64x2_t result;
11495 __asm__ ("frsqrte %0.2d,%1.2d"
11496 : "=w"(result)
11497 : "w"(a)
11498 : /* No clobbers */);
11499 return result;
11502 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11503 vrsqrteq_u32 (uint32x4_t a)
11505 uint32x4_t result;
11506 __asm__ ("ursqrte %0.4s,%1.4s"
11507 : "=w"(result)
11508 : "w"(a)
11509 : /* No clobbers */);
11510 return result;
11513 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11514 vrsqrtes_f32 (float32_t a)
11516 float32_t result;
11517 __asm__ ("frsqrte %s0,%s1"
11518 : "=w"(result)
11519 : "w"(a)
11520 : /* No clobbers */);
11521 return result;
11524 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11525 vrsqrts_f32 (float32x2_t a, float32x2_t b)
11527 float32x2_t result;
11528 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
11529 : "=w"(result)
11530 : "w"(a), "w"(b)
11531 : /* No clobbers */);
11532 return result;
11535 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11536 vrsqrtsd_f64 (float64_t a, float64_t b)
11538 float64_t result;
11539 __asm__ ("frsqrts %d0,%d1,%d2"
11540 : "=w"(result)
11541 : "w"(a), "w"(b)
11542 : /* No clobbers */);
11543 return result;
11546 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11547 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
11549 float32x4_t result;
11550 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
11551 : "=w"(result)
11552 : "w"(a), "w"(b)
11553 : /* No clobbers */);
11554 return result;
11557 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11558 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
11560 float64x2_t result;
11561 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11562 : "=w"(result)
11563 : "w"(a), "w"(b)
11564 : /* No clobbers */);
11565 return result;
11568 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11569 vrsqrtss_f32 (float32_t a, float32_t b)
11571 float32_t result;
11572 __asm__ ("frsqrts %s0,%s1,%s2"
11573 : "=w"(result)
11574 : "w"(a), "w"(b)
11575 : /* No clobbers */);
11576 return result;
11579 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11580 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
11582 float64x2_t result;
11583 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11584 : "=w"(result)
11585 : "w"(a), "w"(b)
11586 : /* No clobbers */);
11587 return result;
11590 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11591 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11593 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11594 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11595 : "+w"(result)
11596 : "w"(b), "w"(c)
11597 : /* No clobbers */);
11598 return result;
11601 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11602 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11604 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11605 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11606 : "+w"(result)
11607 : "w"(b), "w"(c)
11608 : /* No clobbers */);
11609 return result;
11612 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11613 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11615 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11616 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11617 : "+w"(result)
11618 : "w"(b), "w"(c)
11619 : /* No clobbers */);
11620 return result;
11623 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11624 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11626 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11627 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11628 : "+w"(result)
11629 : "w"(b), "w"(c)
11630 : /* No clobbers */);
11631 return result;
11634 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11635 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11637 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11638 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11639 : "+w"(result)
11640 : "w"(b), "w"(c)
11641 : /* No clobbers */);
11642 return result;
11645 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11646 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11648 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11649 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11650 : "+w"(result)
11651 : "w"(b), "w"(c)
11652 : /* No clobbers */);
11653 return result;
11656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11657 vrsubhn_s16 (int16x8_t a, int16x8_t b)
11659 int8x8_t result;
11660 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11661 : "=w"(result)
11662 : "w"(a), "w"(b)
11663 : /* No clobbers */);
11664 return result;
11667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11668 vrsubhn_s32 (int32x4_t a, int32x4_t b)
11670 int16x4_t result;
11671 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11672 : "=w"(result)
11673 : "w"(a), "w"(b)
11674 : /* No clobbers */);
11675 return result;
11678 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11679 vrsubhn_s64 (int64x2_t a, int64x2_t b)
11681 int32x2_t result;
11682 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11683 : "=w"(result)
11684 : "w"(a), "w"(b)
11685 : /* No clobbers */);
11686 return result;
11689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11690 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
11692 uint8x8_t result;
11693 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11694 : "=w"(result)
11695 : "w"(a), "w"(b)
11696 : /* No clobbers */);
11697 return result;
11700 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11701 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
11703 uint16x4_t result;
11704 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11705 : "=w"(result)
11706 : "w"(a), "w"(b)
11707 : /* No clobbers */);
11708 return result;
11711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11712 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
11714 uint32x2_t result;
11715 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11716 : "=w"(result)
11717 : "w"(a), "w"(b)
11718 : /* No clobbers */);
11719 return result;
11722 #define vset_lane_f32(a, b, c) \
11723 __extension__ \
11724 ({ \
11725 float32x2_t b_ = (b); \
11726 float32_t a_ = (a); \
11727 float32x2_t result; \
11728 __asm__ ("ins %0.s[%3], %w1" \
11729 : "=w"(result) \
11730 : "r"(a_), "0"(b_), "i"(c) \
11731 : /* No clobbers */); \
11732 result; \
11735 #define vset_lane_f64(a, b, c) \
11736 __extension__ \
11737 ({ \
11738 float64x1_t b_ = (b); \
11739 float64_t a_ = (a); \
11740 float64x1_t result; \
11741 __asm__ ("ins %0.d[%3], %x1" \
11742 : "=w"(result) \
11743 : "r"(a_), "0"(b_), "i"(c) \
11744 : /* No clobbers */); \
11745 result; \
11748 #define vset_lane_p8(a, b, c) \
11749 __extension__ \
11750 ({ \
11751 poly8x8_t b_ = (b); \
11752 poly8_t a_ = (a); \
11753 poly8x8_t result; \
11754 __asm__ ("ins %0.b[%3], %w1" \
11755 : "=w"(result) \
11756 : "r"(a_), "0"(b_), "i"(c) \
11757 : /* No clobbers */); \
11758 result; \
11761 #define vset_lane_p16(a, b, c) \
11762 __extension__ \
11763 ({ \
11764 poly16x4_t b_ = (b); \
11765 poly16_t a_ = (a); \
11766 poly16x4_t result; \
11767 __asm__ ("ins %0.h[%3], %w1" \
11768 : "=w"(result) \
11769 : "r"(a_), "0"(b_), "i"(c) \
11770 : /* No clobbers */); \
11771 result; \
11774 #define vset_lane_s8(a, b, c) \
11775 __extension__ \
11776 ({ \
11777 int8x8_t b_ = (b); \
11778 int8_t a_ = (a); \
11779 int8x8_t result; \
11780 __asm__ ("ins %0.b[%3], %w1" \
11781 : "=w"(result) \
11782 : "r"(a_), "0"(b_), "i"(c) \
11783 : /* No clobbers */); \
11784 result; \
11787 #define vset_lane_s16(a, b, c) \
11788 __extension__ \
11789 ({ \
11790 int16x4_t b_ = (b); \
11791 int16_t a_ = (a); \
11792 int16x4_t result; \
11793 __asm__ ("ins %0.h[%3], %w1" \
11794 : "=w"(result) \
11795 : "r"(a_), "0"(b_), "i"(c) \
11796 : /* No clobbers */); \
11797 result; \
11800 #define vset_lane_s32(a, b, c) \
11801 __extension__ \
11802 ({ \
11803 int32x2_t b_ = (b); \
11804 int32_t a_ = (a); \
11805 int32x2_t result; \
11806 __asm__ ("ins %0.s[%3], %w1" \
11807 : "=w"(result) \
11808 : "r"(a_), "0"(b_), "i"(c) \
11809 : /* No clobbers */); \
11810 result; \
11813 #define vset_lane_s64(a, b, c) \
11814 __extension__ \
11815 ({ \
11816 int64x1_t b_ = (b); \
11817 int64_t a_ = (a); \
11818 int64x1_t result; \
11819 __asm__ ("ins %0.d[%3], %x1" \
11820 : "=w"(result) \
11821 : "r"(a_), "0"(b_), "i"(c) \
11822 : /* No clobbers */); \
11823 result; \
11826 #define vset_lane_u8(a, b, c) \
11827 __extension__ \
11828 ({ \
11829 uint8x8_t b_ = (b); \
11830 uint8_t a_ = (a); \
11831 uint8x8_t result; \
11832 __asm__ ("ins %0.b[%3], %w1" \
11833 : "=w"(result) \
11834 : "r"(a_), "0"(b_), "i"(c) \
11835 : /* No clobbers */); \
11836 result; \
11839 #define vset_lane_u16(a, b, c) \
11840 __extension__ \
11841 ({ \
11842 uint16x4_t b_ = (b); \
11843 uint16_t a_ = (a); \
11844 uint16x4_t result; \
11845 __asm__ ("ins %0.h[%3], %w1" \
11846 : "=w"(result) \
11847 : "r"(a_), "0"(b_), "i"(c) \
11848 : /* No clobbers */); \
11849 result; \
11852 #define vset_lane_u32(a, b, c) \
11853 __extension__ \
11854 ({ \
11855 uint32x2_t b_ = (b); \
11856 uint32_t a_ = (a); \
11857 uint32x2_t result; \
11858 __asm__ ("ins %0.s[%3], %w1" \
11859 : "=w"(result) \
11860 : "r"(a_), "0"(b_), "i"(c) \
11861 : /* No clobbers */); \
11862 result; \
11865 #define vset_lane_u64(a, b, c) \
11866 __extension__ \
11867 ({ \
11868 uint64x1_t b_ = (b); \
11869 uint64_t a_ = (a); \
11870 uint64x1_t result; \
11871 __asm__ ("ins %0.d[%3], %x1" \
11872 : "=w"(result) \
11873 : "r"(a_), "0"(b_), "i"(c) \
11874 : /* No clobbers */); \
11875 result; \
11878 #define vsetq_lane_f32(a, b, c) \
11879 __extension__ \
11880 ({ \
11881 float32x4_t b_ = (b); \
11882 float32_t a_ = (a); \
11883 float32x4_t result; \
11884 __asm__ ("ins %0.s[%3], %w1" \
11885 : "=w"(result) \
11886 : "r"(a_), "0"(b_), "i"(c) \
11887 : /* No clobbers */); \
11888 result; \
11891 #define vsetq_lane_f64(a, b, c) \
11892 __extension__ \
11893 ({ \
11894 float64x2_t b_ = (b); \
11895 float64_t a_ = (a); \
11896 float64x2_t result; \
11897 __asm__ ("ins %0.d[%3], %x1" \
11898 : "=w"(result) \
11899 : "r"(a_), "0"(b_), "i"(c) \
11900 : /* No clobbers */); \
11901 result; \
11904 #define vsetq_lane_p8(a, b, c) \
11905 __extension__ \
11906 ({ \
11907 poly8x16_t b_ = (b); \
11908 poly8_t a_ = (a); \
11909 poly8x16_t result; \
11910 __asm__ ("ins %0.b[%3], %w1" \
11911 : "=w"(result) \
11912 : "r"(a_), "0"(b_), "i"(c) \
11913 : /* No clobbers */); \
11914 result; \
11917 #define vsetq_lane_p16(a, b, c) \
11918 __extension__ \
11919 ({ \
11920 poly16x8_t b_ = (b); \
11921 poly16_t a_ = (a); \
11922 poly16x8_t result; \
11923 __asm__ ("ins %0.h[%3], %w1" \
11924 : "=w"(result) \
11925 : "r"(a_), "0"(b_), "i"(c) \
11926 : /* No clobbers */); \
11927 result; \
11930 #define vsetq_lane_s8(a, b, c) \
11931 __extension__ \
11932 ({ \
11933 int8x16_t b_ = (b); \
11934 int8_t a_ = (a); \
11935 int8x16_t result; \
11936 __asm__ ("ins %0.b[%3], %w1" \
11937 : "=w"(result) \
11938 : "r"(a_), "0"(b_), "i"(c) \
11939 : /* No clobbers */); \
11940 result; \
11943 #define vsetq_lane_s16(a, b, c) \
11944 __extension__ \
11945 ({ \
11946 int16x8_t b_ = (b); \
11947 int16_t a_ = (a); \
11948 int16x8_t result; \
11949 __asm__ ("ins %0.h[%3], %w1" \
11950 : "=w"(result) \
11951 : "r"(a_), "0"(b_), "i"(c) \
11952 : /* No clobbers */); \
11953 result; \
11956 #define vsetq_lane_s32(a, b, c) \
11957 __extension__ \
11958 ({ \
11959 int32x4_t b_ = (b); \
11960 int32_t a_ = (a); \
11961 int32x4_t result; \
11962 __asm__ ("ins %0.s[%3], %w1" \
11963 : "=w"(result) \
11964 : "r"(a_), "0"(b_), "i"(c) \
11965 : /* No clobbers */); \
11966 result; \
11969 #define vsetq_lane_s64(a, b, c) \
11970 __extension__ \
11971 ({ \
11972 int64x2_t b_ = (b); \
11973 int64_t a_ = (a); \
11974 int64x2_t result; \
11975 __asm__ ("ins %0.d[%3], %x1" \
11976 : "=w"(result) \
11977 : "r"(a_), "0"(b_), "i"(c) \
11978 : /* No clobbers */); \
11979 result; \
11982 #define vsetq_lane_u8(a, b, c) \
11983 __extension__ \
11984 ({ \
11985 uint8x16_t b_ = (b); \
11986 uint8_t a_ = (a); \
11987 uint8x16_t result; \
11988 __asm__ ("ins %0.b[%3], %w1" \
11989 : "=w"(result) \
11990 : "r"(a_), "0"(b_), "i"(c) \
11991 : /* No clobbers */); \
11992 result; \
11995 #define vsetq_lane_u16(a, b, c) \
11996 __extension__ \
11997 ({ \
11998 uint16x8_t b_ = (b); \
11999 uint16_t a_ = (a); \
12000 uint16x8_t result; \
12001 __asm__ ("ins %0.h[%3], %w1" \
12002 : "=w"(result) \
12003 : "r"(a_), "0"(b_), "i"(c) \
12004 : /* No clobbers */); \
12005 result; \
12008 #define vsetq_lane_u32(a, b, c) \
12009 __extension__ \
12010 ({ \
12011 uint32x4_t b_ = (b); \
12012 uint32_t a_ = (a); \
12013 uint32x4_t result; \
12014 __asm__ ("ins %0.s[%3], %w1" \
12015 : "=w"(result) \
12016 : "r"(a_), "0"(b_), "i"(c) \
12017 : /* No clobbers */); \
12018 result; \
12021 #define vsetq_lane_u64(a, b, c) \
12022 __extension__ \
12023 ({ \
12024 uint64x2_t b_ = (b); \
12025 uint64_t a_ = (a); \
12026 uint64x2_t result; \
12027 __asm__ ("ins %0.d[%3], %x1" \
12028 : "=w"(result) \
12029 : "r"(a_), "0"(b_), "i"(c) \
12030 : /* No clobbers */); \
12031 result; \
12034 #define vshrn_high_n_s16(a, b, c) \
12035 __extension__ \
12036 ({ \
12037 int16x8_t b_ = (b); \
12038 int8x8_t a_ = (a); \
12039 int8x16_t result = vcombine_s8 \
12040 (a_, vcreate_s8 \
12041 (__AARCH64_UINT64_C (0x0))); \
12042 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
12043 : "+w"(result) \
12044 : "w"(b_), "i"(c) \
12045 : /* No clobbers */); \
12046 result; \
12049 #define vshrn_high_n_s32(a, b, c) \
12050 __extension__ \
12051 ({ \
12052 int32x4_t b_ = (b); \
12053 int16x4_t a_ = (a); \
12054 int16x8_t result = vcombine_s16 \
12055 (a_, vcreate_s16 \
12056 (__AARCH64_UINT64_C (0x0))); \
12057 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
12058 : "+w"(result) \
12059 : "w"(b_), "i"(c) \
12060 : /* No clobbers */); \
12061 result; \
12064 #define vshrn_high_n_s64(a, b, c) \
12065 __extension__ \
12066 ({ \
12067 int64x2_t b_ = (b); \
12068 int32x2_t a_ = (a); \
12069 int32x4_t result = vcombine_s32 \
12070 (a_, vcreate_s32 \
12071 (__AARCH64_UINT64_C (0x0))); \
12072 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
12073 : "+w"(result) \
12074 : "w"(b_), "i"(c) \
12075 : /* No clobbers */); \
12076 result; \
12079 #define vshrn_high_n_u16(a, b, c) \
12080 __extension__ \
12081 ({ \
12082 uint16x8_t b_ = (b); \
12083 uint8x8_t a_ = (a); \
12084 uint8x16_t result = vcombine_u8 \
12085 (a_, vcreate_u8 \
12086 (__AARCH64_UINT64_C (0x0))); \
12087 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
12088 : "+w"(result) \
12089 : "w"(b_), "i"(c) \
12090 : /* No clobbers */); \
12091 result; \
12094 #define vshrn_high_n_u32(a, b, c) \
12095 __extension__ \
12096 ({ \
12097 uint32x4_t b_ = (b); \
12098 uint16x4_t a_ = (a); \
12099 uint16x8_t result = vcombine_u16 \
12100 (a_, vcreate_u16 \
12101 (__AARCH64_UINT64_C (0x0))); \
12102 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
12103 : "+w"(result) \
12104 : "w"(b_), "i"(c) \
12105 : /* No clobbers */); \
12106 result; \
12109 #define vshrn_high_n_u64(a, b, c) \
12110 __extension__ \
12111 ({ \
12112 uint64x2_t b_ = (b); \
12113 uint32x2_t a_ = (a); \
12114 uint32x4_t result = vcombine_u32 \
12115 (a_, vcreate_u32 \
12116 (__AARCH64_UINT64_C (0x0))); \
12117 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
12118 : "+w"(result) \
12119 : "w"(b_), "i"(c) \
12120 : /* No clobbers */); \
12121 result; \
12124 #define vshrn_n_s16(a, b) \
12125 __extension__ \
12126 ({ \
12127 int16x8_t a_ = (a); \
12128 int8x8_t result; \
12129 __asm__ ("shrn %0.8b,%1.8h,%2" \
12130 : "=w"(result) \
12131 : "w"(a_), "i"(b) \
12132 : /* No clobbers */); \
12133 result; \
12136 #define vshrn_n_s32(a, b) \
12137 __extension__ \
12138 ({ \
12139 int32x4_t a_ = (a); \
12140 int16x4_t result; \
12141 __asm__ ("shrn %0.4h,%1.4s,%2" \
12142 : "=w"(result) \
12143 : "w"(a_), "i"(b) \
12144 : /* No clobbers */); \
12145 result; \
12148 #define vshrn_n_s64(a, b) \
12149 __extension__ \
12150 ({ \
12151 int64x2_t a_ = (a); \
12152 int32x2_t result; \
12153 __asm__ ("shrn %0.2s,%1.2d,%2" \
12154 : "=w"(result) \
12155 : "w"(a_), "i"(b) \
12156 : /* No clobbers */); \
12157 result; \
12160 #define vshrn_n_u16(a, b) \
12161 __extension__ \
12162 ({ \
12163 uint16x8_t a_ = (a); \
12164 uint8x8_t result; \
12165 __asm__ ("shrn %0.8b,%1.8h,%2" \
12166 : "=w"(result) \
12167 : "w"(a_), "i"(b) \
12168 : /* No clobbers */); \
12169 result; \
12172 #define vshrn_n_u32(a, b) \
12173 __extension__ \
12174 ({ \
12175 uint32x4_t a_ = (a); \
12176 uint16x4_t result; \
12177 __asm__ ("shrn %0.4h,%1.4s,%2" \
12178 : "=w"(result) \
12179 : "w"(a_), "i"(b) \
12180 : /* No clobbers */); \
12181 result; \
12184 #define vshrn_n_u64(a, b) \
12185 __extension__ \
12186 ({ \
12187 uint64x2_t a_ = (a); \
12188 uint32x2_t result; \
12189 __asm__ ("shrn %0.2s,%1.2d,%2" \
12190 : "=w"(result) \
12191 : "w"(a_), "i"(b) \
12192 : /* No clobbers */); \
12193 result; \
12196 #define vsli_n_p8(a, b, c) \
12197 __extension__ \
12198 ({ \
12199 poly8x8_t b_ = (b); \
12200 poly8x8_t a_ = (a); \
12201 poly8x8_t result; \
12202 __asm__ ("sli %0.8b,%2.8b,%3" \
12203 : "=w"(result) \
12204 : "0"(a_), "w"(b_), "i"(c) \
12205 : /* No clobbers */); \
12206 result; \
12209 #define vsli_n_p16(a, b, c) \
12210 __extension__ \
12211 ({ \
12212 poly16x4_t b_ = (b); \
12213 poly16x4_t a_ = (a); \
12214 poly16x4_t result; \
12215 __asm__ ("sli %0.4h,%2.4h,%3" \
12216 : "=w"(result) \
12217 : "0"(a_), "w"(b_), "i"(c) \
12218 : /* No clobbers */); \
12219 result; \
12222 #define vsliq_n_p8(a, b, c) \
12223 __extension__ \
12224 ({ \
12225 poly8x16_t b_ = (b); \
12226 poly8x16_t a_ = (a); \
12227 poly8x16_t result; \
12228 __asm__ ("sli %0.16b,%2.16b,%3" \
12229 : "=w"(result) \
12230 : "0"(a_), "w"(b_), "i"(c) \
12231 : /* No clobbers */); \
12232 result; \
12235 #define vsliq_n_p16(a, b, c) \
12236 __extension__ \
12237 ({ \
12238 poly16x8_t b_ = (b); \
12239 poly16x8_t a_ = (a); \
12240 poly16x8_t result; \
12241 __asm__ ("sli %0.8h,%2.8h,%3" \
12242 : "=w"(result) \
12243 : "0"(a_), "w"(b_), "i"(c) \
12244 : /* No clobbers */); \
12245 result; \
12248 #define vsri_n_p8(a, b, c) \
12249 __extension__ \
12250 ({ \
12251 poly8x8_t b_ = (b); \
12252 poly8x8_t a_ = (a); \
12253 poly8x8_t result; \
12254 __asm__ ("sri %0.8b,%2.8b,%3" \
12255 : "=w"(result) \
12256 : "0"(a_), "w"(b_), "i"(c) \
12257 : /* No clobbers */); \
12258 result; \
12261 #define vsri_n_p16(a, b, c) \
12262 __extension__ \
12263 ({ \
12264 poly16x4_t b_ = (b); \
12265 poly16x4_t a_ = (a); \
12266 poly16x4_t result; \
12267 __asm__ ("sri %0.4h,%2.4h,%3" \
12268 : "=w"(result) \
12269 : "0"(a_), "w"(b_), "i"(c) \
12270 : /* No clobbers */); \
12271 result; \
12274 #define vsriq_n_p8(a, b, c) \
12275 __extension__ \
12276 ({ \
12277 poly8x16_t b_ = (b); \
12278 poly8x16_t a_ = (a); \
12279 poly8x16_t result; \
12280 __asm__ ("sri %0.16b,%2.16b,%3" \
12281 : "=w"(result) \
12282 : "0"(a_), "w"(b_), "i"(c) \
12283 : /* No clobbers */); \
12284 result; \
12287 #define vsriq_n_p16(a, b, c) \
12288 __extension__ \
12289 ({ \
12290 poly16x8_t b_ = (b); \
12291 poly16x8_t a_ = (a); \
12292 poly16x8_t result; \
12293 __asm__ ("sri %0.8h,%2.8h,%3" \
12294 : "=w"(result) \
12295 : "0"(a_), "w"(b_), "i"(c) \
12296 : /* No clobbers */); \
12297 result; \
12300 #define vst1_lane_f32(a, b, c) \
12301 __extension__ \
12302 ({ \
12303 float32x2_t b_ = (b); \
12304 float32_t * a_ = (a); \
12305 __asm__ ("st1 {%1.s}[%2],[%0]" \
12307 : "r"(a_), "w"(b_), "i"(c) \
12308 : "memory"); \
12311 #define vst1_lane_f64(a, b, c) \
12312 __extension__ \
12313 ({ \
12314 float64x1_t b_ = (b); \
12315 float64_t * a_ = (a); \
12316 __asm__ ("st1 {%1.d}[%2],[%0]" \
12318 : "r"(a_), "w"(b_), "i"(c) \
12319 : "memory"); \
12322 #define vst1_lane_p8(a, b, c) \
12323 __extension__ \
12324 ({ \
12325 poly8x8_t b_ = (b); \
12326 poly8_t * a_ = (a); \
12327 __asm__ ("st1 {%1.b}[%2],[%0]" \
12329 : "r"(a_), "w"(b_), "i"(c) \
12330 : "memory"); \
12333 #define vst1_lane_p16(a, b, c) \
12334 __extension__ \
12335 ({ \
12336 poly16x4_t b_ = (b); \
12337 poly16_t * a_ = (a); \
12338 __asm__ ("st1 {%1.h}[%2],[%0]" \
12340 : "r"(a_), "w"(b_), "i"(c) \
12341 : "memory"); \
12344 #define vst1_lane_s8(a, b, c) \
12345 __extension__ \
12346 ({ \
12347 int8x8_t b_ = (b); \
12348 int8_t * a_ = (a); \
12349 __asm__ ("st1 {%1.b}[%2],[%0]" \
12351 : "r"(a_), "w"(b_), "i"(c) \
12352 : "memory"); \
12355 #define vst1_lane_s16(a, b, c) \
12356 __extension__ \
12357 ({ \
12358 int16x4_t b_ = (b); \
12359 int16_t * a_ = (a); \
12360 __asm__ ("st1 {%1.h}[%2],[%0]" \
12362 : "r"(a_), "w"(b_), "i"(c) \
12363 : "memory"); \
12366 #define vst1_lane_s32(a, b, c) \
12367 __extension__ \
12368 ({ \
12369 int32x2_t b_ = (b); \
12370 int32_t * a_ = (a); \
12371 __asm__ ("st1 {%1.s}[%2],[%0]" \
12373 : "r"(a_), "w"(b_), "i"(c) \
12374 : "memory"); \
12377 #define vst1_lane_s64(a, b, c) \
12378 __extension__ \
12379 ({ \
12380 int64x1_t b_ = (b); \
12381 int64_t * a_ = (a); \
12382 __asm__ ("st1 {%1.d}[%2],[%0]" \
12384 : "r"(a_), "w"(b_), "i"(c) \
12385 : "memory"); \
12388 #define vst1_lane_u8(a, b, c) \
12389 __extension__ \
12390 ({ \
12391 uint8x8_t b_ = (b); \
12392 uint8_t * a_ = (a); \
12393 __asm__ ("st1 {%1.b}[%2],[%0]" \
12395 : "r"(a_), "w"(b_), "i"(c) \
12396 : "memory"); \
12399 #define vst1_lane_u16(a, b, c) \
12400 __extension__ \
12401 ({ \
12402 uint16x4_t b_ = (b); \
12403 uint16_t * a_ = (a); \
12404 __asm__ ("st1 {%1.h}[%2],[%0]" \
12406 : "r"(a_), "w"(b_), "i"(c) \
12407 : "memory"); \
12410 #define vst1_lane_u32(a, b, c) \
12411 __extension__ \
12412 ({ \
12413 uint32x2_t b_ = (b); \
12414 uint32_t * a_ = (a); \
12415 __asm__ ("st1 {%1.s}[%2],[%0]" \
12417 : "r"(a_), "w"(b_), "i"(c) \
12418 : "memory"); \
12421 #define vst1_lane_u64(a, b, c) \
12422 __extension__ \
12423 ({ \
12424 uint64x1_t b_ = (b); \
12425 uint64_t * a_ = (a); \
12426 __asm__ ("st1 {%1.d}[%2],[%0]" \
12428 : "r"(a_), "w"(b_), "i"(c) \
12429 : "memory"); \
12433 #define vst1q_lane_f32(a, b, c) \
12434 __extension__ \
12435 ({ \
12436 float32x4_t b_ = (b); \
12437 float32_t * a_ = (a); \
12438 __asm__ ("st1 {%1.s}[%2],[%0]" \
12440 : "r"(a_), "w"(b_), "i"(c) \
12441 : "memory"); \
12444 #define vst1q_lane_f64(a, b, c) \
12445 __extension__ \
12446 ({ \
12447 float64x2_t b_ = (b); \
12448 float64_t * a_ = (a); \
12449 __asm__ ("st1 {%1.d}[%2],[%0]" \
12451 : "r"(a_), "w"(b_), "i"(c) \
12452 : "memory"); \
12455 #define vst1q_lane_p8(a, b, c) \
12456 __extension__ \
12457 ({ \
12458 poly8x16_t b_ = (b); \
12459 poly8_t * a_ = (a); \
12460 __asm__ ("st1 {%1.b}[%2],[%0]" \
12462 : "r"(a_), "w"(b_), "i"(c) \
12463 : "memory"); \
12466 #define vst1q_lane_p16(a, b, c) \
12467 __extension__ \
12468 ({ \
12469 poly16x8_t b_ = (b); \
12470 poly16_t * a_ = (a); \
12471 __asm__ ("st1 {%1.h}[%2],[%0]" \
12473 : "r"(a_), "w"(b_), "i"(c) \
12474 : "memory"); \
12477 #define vst1q_lane_s8(a, b, c) \
12478 __extension__ \
12479 ({ \
12480 int8x16_t b_ = (b); \
12481 int8_t * a_ = (a); \
12482 __asm__ ("st1 {%1.b}[%2],[%0]" \
12484 : "r"(a_), "w"(b_), "i"(c) \
12485 : "memory"); \
12488 #define vst1q_lane_s16(a, b, c) \
12489 __extension__ \
12490 ({ \
12491 int16x8_t b_ = (b); \
12492 int16_t * a_ = (a); \
12493 __asm__ ("st1 {%1.h}[%2],[%0]" \
12495 : "r"(a_), "w"(b_), "i"(c) \
12496 : "memory"); \
12499 #define vst1q_lane_s32(a, b, c) \
12500 __extension__ \
12501 ({ \
12502 int32x4_t b_ = (b); \
12503 int32_t * a_ = (a); \
12504 __asm__ ("st1 {%1.s}[%2],[%0]" \
12506 : "r"(a_), "w"(b_), "i"(c) \
12507 : "memory"); \
12510 #define vst1q_lane_s64(a, b, c) \
12511 __extension__ \
12512 ({ \
12513 int64x2_t b_ = (b); \
12514 int64_t * a_ = (a); \
12515 __asm__ ("st1 {%1.d}[%2],[%0]" \
12517 : "r"(a_), "w"(b_), "i"(c) \
12518 : "memory"); \
12521 #define vst1q_lane_u8(a, b, c) \
12522 __extension__ \
12523 ({ \
12524 uint8x16_t b_ = (b); \
12525 uint8_t * a_ = (a); \
12526 __asm__ ("st1 {%1.b}[%2],[%0]" \
12528 : "r"(a_), "w"(b_), "i"(c) \
12529 : "memory"); \
12532 #define vst1q_lane_u16(a, b, c) \
12533 __extension__ \
12534 ({ \
12535 uint16x8_t b_ = (b); \
12536 uint16_t * a_ = (a); \
12537 __asm__ ("st1 {%1.h}[%2],[%0]" \
12539 : "r"(a_), "w"(b_), "i"(c) \
12540 : "memory"); \
12543 #define vst1q_lane_u32(a, b, c) \
12544 __extension__ \
12545 ({ \
12546 uint32x4_t b_ = (b); \
12547 uint32_t * a_ = (a); \
12548 __asm__ ("st1 {%1.s}[%2],[%0]" \
12550 : "r"(a_), "w"(b_), "i"(c) \
12551 : "memory"); \
12554 #define vst1q_lane_u64(a, b, c) \
12555 __extension__ \
12556 ({ \
12557 uint64x2_t b_ = (b); \
12558 uint64_t * a_ = (a); \
12559 __asm__ ("st1 {%1.d}[%2],[%0]" \
12561 : "r"(a_), "w"(b_), "i"(c) \
12562 : "memory"); \
12565 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12566 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12568 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12569 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12570 : "+w"(result)
12571 : "w"(b), "w"(c)
12572 : /* No clobbers */);
12573 return result;
12576 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12577 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12579 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12580 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12581 : "+w"(result)
12582 : "w"(b), "w"(c)
12583 : /* No clobbers */);
12584 return result;
12587 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12588 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12590 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12591 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12592 : "+w"(result)
12593 : "w"(b), "w"(c)
12594 : /* No clobbers */);
12595 return result;
12598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12599 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12601 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12602 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12603 : "+w"(result)
12604 : "w"(b), "w"(c)
12605 : /* No clobbers */);
12606 return result;
12609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12610 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12612 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12613 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12614 : "+w"(result)
12615 : "w"(b), "w"(c)
12616 : /* No clobbers */);
12617 return result;
12620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12621 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12623 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12624 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12625 : "+w"(result)
12626 : "w"(b), "w"(c)
12627 : /* No clobbers */);
12628 return result;
12631 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12632 vsubhn_s16 (int16x8_t a, int16x8_t b)
12634 int8x8_t result;
12635 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12636 : "=w"(result)
12637 : "w"(a), "w"(b)
12638 : /* No clobbers */);
12639 return result;
12642 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12643 vsubhn_s32 (int32x4_t a, int32x4_t b)
12645 int16x4_t result;
12646 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12647 : "=w"(result)
12648 : "w"(a), "w"(b)
12649 : /* No clobbers */);
12650 return result;
12653 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12654 vsubhn_s64 (int64x2_t a, int64x2_t b)
12656 int32x2_t result;
12657 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12658 : "=w"(result)
12659 : "w"(a), "w"(b)
12660 : /* No clobbers */);
12661 return result;
12664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12665 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
12667 uint8x8_t result;
12668 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12669 : "=w"(result)
12670 : "w"(a), "w"(b)
12671 : /* No clobbers */);
12672 return result;
12675 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12676 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
12678 uint16x4_t result;
12679 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12680 : "=w"(result)
12681 : "w"(a), "w"(b)
12682 : /* No clobbers */);
12683 return result;
12686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12687 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
12689 uint32x2_t result;
12690 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12691 : "=w"(result)
12692 : "w"(a), "w"(b)
12693 : /* No clobbers */);
12694 return result;
12697 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12698 vtrn1_f32 (float32x2_t a, float32x2_t b)
12700 float32x2_t result;
12701 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12702 : "=w"(result)
12703 : "w"(a), "w"(b)
12704 : /* No clobbers */);
12705 return result;
12708 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12709 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
12711 poly8x8_t result;
12712 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12713 : "=w"(result)
12714 : "w"(a), "w"(b)
12715 : /* No clobbers */);
12716 return result;
12719 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12720 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
12722 poly16x4_t result;
12723 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12724 : "=w"(result)
12725 : "w"(a), "w"(b)
12726 : /* No clobbers */);
12727 return result;
12730 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12731 vtrn1_s8 (int8x8_t a, int8x8_t b)
12733 int8x8_t result;
12734 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12735 : "=w"(result)
12736 : "w"(a), "w"(b)
12737 : /* No clobbers */);
12738 return result;
12741 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12742 vtrn1_s16 (int16x4_t a, int16x4_t b)
12744 int16x4_t result;
12745 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12746 : "=w"(result)
12747 : "w"(a), "w"(b)
12748 : /* No clobbers */);
12749 return result;
12752 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12753 vtrn1_s32 (int32x2_t a, int32x2_t b)
12755 int32x2_t result;
12756 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12757 : "=w"(result)
12758 : "w"(a), "w"(b)
12759 : /* No clobbers */);
12760 return result;
12763 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12764 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
12766 uint8x8_t result;
12767 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12768 : "=w"(result)
12769 : "w"(a), "w"(b)
12770 : /* No clobbers */);
12771 return result;
12774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12775 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
12777 uint16x4_t result;
12778 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12779 : "=w"(result)
12780 : "w"(a), "w"(b)
12781 : /* No clobbers */);
12782 return result;
12785 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12786 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
12788 uint32x2_t result;
12789 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12790 : "=w"(result)
12791 : "w"(a), "w"(b)
12792 : /* No clobbers */);
12793 return result;
12796 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12797 vtrn1q_f32 (float32x4_t a, float32x4_t b)
12799 float32x4_t result;
12800 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12801 : "=w"(result)
12802 : "w"(a), "w"(b)
12803 : /* No clobbers */);
12804 return result;
12807 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12808 vtrn1q_f64 (float64x2_t a, float64x2_t b)
12810 float64x2_t result;
12811 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12812 : "=w"(result)
12813 : "w"(a), "w"(b)
12814 : /* No clobbers */);
12815 return result;
12818 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12819 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
12821 poly8x16_t result;
12822 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12823 : "=w"(result)
12824 : "w"(a), "w"(b)
12825 : /* No clobbers */);
12826 return result;
12829 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12830 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
12832 poly16x8_t result;
12833 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12834 : "=w"(result)
12835 : "w"(a), "w"(b)
12836 : /* No clobbers */);
12837 return result;
12840 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12841 vtrn1q_s8 (int8x16_t a, int8x16_t b)
12843 int8x16_t result;
12844 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12845 : "=w"(result)
12846 : "w"(a), "w"(b)
12847 : /* No clobbers */);
12848 return result;
12851 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12852 vtrn1q_s16 (int16x8_t a, int16x8_t b)
12854 int16x8_t result;
12855 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12856 : "=w"(result)
12857 : "w"(a), "w"(b)
12858 : /* No clobbers */);
12859 return result;
12862 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12863 vtrn1q_s32 (int32x4_t a, int32x4_t b)
12865 int32x4_t result;
12866 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12867 : "=w"(result)
12868 : "w"(a), "w"(b)
12869 : /* No clobbers */);
12870 return result;
12873 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12874 vtrn1q_s64 (int64x2_t a, int64x2_t b)
12876 int64x2_t result;
12877 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12878 : "=w"(result)
12879 : "w"(a), "w"(b)
12880 : /* No clobbers */);
12881 return result;
12884 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12885 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
12887 uint8x16_t result;
12888 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12889 : "=w"(result)
12890 : "w"(a), "w"(b)
12891 : /* No clobbers */);
12892 return result;
12895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12896 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
12898 uint16x8_t result;
12899 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12900 : "=w"(result)
12901 : "w"(a), "w"(b)
12902 : /* No clobbers */);
12903 return result;
12906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12907 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
12909 uint32x4_t result;
12910 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12911 : "=w"(result)
12912 : "w"(a), "w"(b)
12913 : /* No clobbers */);
12914 return result;
12917 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12918 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
12920 uint64x2_t result;
12921 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12922 : "=w"(result)
12923 : "w"(a), "w"(b)
12924 : /* No clobbers */);
12925 return result;
12928 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12929 vtrn2_f32 (float32x2_t a, float32x2_t b)
12931 float32x2_t result;
12932 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12933 : "=w"(result)
12934 : "w"(a), "w"(b)
12935 : /* No clobbers */);
12936 return result;
12939 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12940 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
12942 poly8x8_t result;
12943 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12944 : "=w"(result)
12945 : "w"(a), "w"(b)
12946 : /* No clobbers */);
12947 return result;
12950 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12951 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
12953 poly16x4_t result;
12954 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12955 : "=w"(result)
12956 : "w"(a), "w"(b)
12957 : /* No clobbers */);
12958 return result;
12961 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12962 vtrn2_s8 (int8x8_t a, int8x8_t b)
12964 int8x8_t result;
12965 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12966 : "=w"(result)
12967 : "w"(a), "w"(b)
12968 : /* No clobbers */);
12969 return result;
12972 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12973 vtrn2_s16 (int16x4_t a, int16x4_t b)
12975 int16x4_t result;
12976 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12977 : "=w"(result)
12978 : "w"(a), "w"(b)
12979 : /* No clobbers */);
12980 return result;
12983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12984 vtrn2_s32 (int32x2_t a, int32x2_t b)
12986 int32x2_t result;
12987 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12988 : "=w"(result)
12989 : "w"(a), "w"(b)
12990 : /* No clobbers */);
12991 return result;
12994 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12995 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
12997 uint8x8_t result;
12998 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12999 : "=w"(result)
13000 : "w"(a), "w"(b)
13001 : /* No clobbers */);
13002 return result;
13005 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13006 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
13008 uint16x4_t result;
13009 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
13010 : "=w"(result)
13011 : "w"(a), "w"(b)
13012 : /* No clobbers */);
13013 return result;
13016 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13017 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
13019 uint32x2_t result;
13020 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
13021 : "=w"(result)
13022 : "w"(a), "w"(b)
13023 : /* No clobbers */);
13024 return result;
13027 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13028 vtrn2q_f32 (float32x4_t a, float32x4_t b)
13030 float32x4_t result;
13031 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13032 : "=w"(result)
13033 : "w"(a), "w"(b)
13034 : /* No clobbers */);
13035 return result;
13038 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13039 vtrn2q_f64 (float64x2_t a, float64x2_t b)
13041 float64x2_t result;
13042 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13043 : "=w"(result)
13044 : "w"(a), "w"(b)
13045 : /* No clobbers */);
13046 return result;
13049 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13050 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
13052 poly8x16_t result;
13053 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13054 : "=w"(result)
13055 : "w"(a), "w"(b)
13056 : /* No clobbers */);
13057 return result;
13060 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13061 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
13063 poly16x8_t result;
13064 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13065 : "=w"(result)
13066 : "w"(a), "w"(b)
13067 : /* No clobbers */);
13068 return result;
13071 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13072 vtrn2q_s8 (int8x16_t a, int8x16_t b)
13074 int8x16_t result;
13075 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13076 : "=w"(result)
13077 : "w"(a), "w"(b)
13078 : /* No clobbers */);
13079 return result;
13082 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13083 vtrn2q_s16 (int16x8_t a, int16x8_t b)
13085 int16x8_t result;
13086 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13087 : "=w"(result)
13088 : "w"(a), "w"(b)
13089 : /* No clobbers */);
13090 return result;
13093 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13094 vtrn2q_s32 (int32x4_t a, int32x4_t b)
13096 int32x4_t result;
13097 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13098 : "=w"(result)
13099 : "w"(a), "w"(b)
13100 : /* No clobbers */);
13101 return result;
13104 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13105 vtrn2q_s64 (int64x2_t a, int64x2_t b)
13107 int64x2_t result;
13108 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13109 : "=w"(result)
13110 : "w"(a), "w"(b)
13111 : /* No clobbers */);
13112 return result;
13115 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13116 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
13118 uint8x16_t result;
13119 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
13120 : "=w"(result)
13121 : "w"(a), "w"(b)
13122 : /* No clobbers */);
13123 return result;
13126 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13127 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
13129 uint16x8_t result;
13130 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
13131 : "=w"(result)
13132 : "w"(a), "w"(b)
13133 : /* No clobbers */);
13134 return result;
13137 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13138 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
13140 uint32x4_t result;
13141 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
13142 : "=w"(result)
13143 : "w"(a), "w"(b)
13144 : /* No clobbers */);
13145 return result;
13148 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13149 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
13151 uint64x2_t result;
13152 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
13153 : "=w"(result)
13154 : "w"(a), "w"(b)
13155 : /* No clobbers */);
13156 return result;
13159 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13160 vtst_p8 (poly8x8_t a, poly8x8_t b)
13162 uint8x8_t result;
13163 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
13164 : "=w"(result)
13165 : "w"(a), "w"(b)
13166 : /* No clobbers */);
13167 return result;
13170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13171 vtst_p16 (poly16x4_t a, poly16x4_t b)
13173 uint16x4_t result;
13174 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
13175 : "=w"(result)
13176 : "w"(a), "w"(b)
13177 : /* No clobbers */);
13178 return result;
13181 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13182 vtstq_p8 (poly8x16_t a, poly8x16_t b)
13184 uint8x16_t result;
13185 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
13186 : "=w"(result)
13187 : "w"(a), "w"(b)
13188 : /* No clobbers */);
13189 return result;
13192 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13193 vtstq_p16 (poly16x8_t a, poly16x8_t b)
13195 uint16x8_t result;
13196 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
13197 : "=w"(result)
13198 : "w"(a), "w"(b)
13199 : /* No clobbers */);
13200 return result;
13202 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13203 vuzp1_f32 (float32x2_t a, float32x2_t b)
13205 float32x2_t result;
13206 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13207 : "=w"(result)
13208 : "w"(a), "w"(b)
13209 : /* No clobbers */);
13210 return result;
13213 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13214 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
13216 poly8x8_t result;
13217 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13218 : "=w"(result)
13219 : "w"(a), "w"(b)
13220 : /* No clobbers */);
13221 return result;
13224 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13225 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
13227 poly16x4_t result;
13228 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13229 : "=w"(result)
13230 : "w"(a), "w"(b)
13231 : /* No clobbers */);
13232 return result;
13235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13236 vuzp1_s8 (int8x8_t a, int8x8_t b)
13238 int8x8_t result;
13239 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13240 : "=w"(result)
13241 : "w"(a), "w"(b)
13242 : /* No clobbers */);
13243 return result;
13246 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13247 vuzp1_s16 (int16x4_t a, int16x4_t b)
13249 int16x4_t result;
13250 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13251 : "=w"(result)
13252 : "w"(a), "w"(b)
13253 : /* No clobbers */);
13254 return result;
13257 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13258 vuzp1_s32 (int32x2_t a, int32x2_t b)
13260 int32x2_t result;
13261 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13262 : "=w"(result)
13263 : "w"(a), "w"(b)
13264 : /* No clobbers */);
13265 return result;
13268 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13269 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
13271 uint8x8_t result;
13272 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13273 : "=w"(result)
13274 : "w"(a), "w"(b)
13275 : /* No clobbers */);
13276 return result;
13279 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13280 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
13282 uint16x4_t result;
13283 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13284 : "=w"(result)
13285 : "w"(a), "w"(b)
13286 : /* No clobbers */);
13287 return result;
13290 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13291 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
13293 uint32x2_t result;
13294 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13295 : "=w"(result)
13296 : "w"(a), "w"(b)
13297 : /* No clobbers */);
13298 return result;
13301 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13302 vuzp1q_f32 (float32x4_t a, float32x4_t b)
13304 float32x4_t result;
13305 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13306 : "=w"(result)
13307 : "w"(a), "w"(b)
13308 : /* No clobbers */);
13309 return result;
13312 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13313 vuzp1q_f64 (float64x2_t a, float64x2_t b)
13315 float64x2_t result;
13316 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13317 : "=w"(result)
13318 : "w"(a), "w"(b)
13319 : /* No clobbers */);
13320 return result;
13323 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13324 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
13326 poly8x16_t result;
13327 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13328 : "=w"(result)
13329 : "w"(a), "w"(b)
13330 : /* No clobbers */);
13331 return result;
13334 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13335 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
13337 poly16x8_t result;
13338 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13339 : "=w"(result)
13340 : "w"(a), "w"(b)
13341 : /* No clobbers */);
13342 return result;
13345 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13346 vuzp1q_s8 (int8x16_t a, int8x16_t b)
13348 int8x16_t result;
13349 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13350 : "=w"(result)
13351 : "w"(a), "w"(b)
13352 : /* No clobbers */);
13353 return result;
13356 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13357 vuzp1q_s16 (int16x8_t a, int16x8_t b)
13359 int16x8_t result;
13360 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13361 : "=w"(result)
13362 : "w"(a), "w"(b)
13363 : /* No clobbers */);
13364 return result;
13367 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13368 vuzp1q_s32 (int32x4_t a, int32x4_t b)
13370 int32x4_t result;
13371 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13372 : "=w"(result)
13373 : "w"(a), "w"(b)
13374 : /* No clobbers */);
13375 return result;
13378 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13379 vuzp1q_s64 (int64x2_t a, int64x2_t b)
13381 int64x2_t result;
13382 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13383 : "=w"(result)
13384 : "w"(a), "w"(b)
13385 : /* No clobbers */);
13386 return result;
13389 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13390 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
13392 uint8x16_t result;
13393 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13394 : "=w"(result)
13395 : "w"(a), "w"(b)
13396 : /* No clobbers */);
13397 return result;
13400 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13401 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
13403 uint16x8_t result;
13404 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13405 : "=w"(result)
13406 : "w"(a), "w"(b)
13407 : /* No clobbers */);
13408 return result;
13411 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13412 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
13414 uint32x4_t result;
13415 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13416 : "=w"(result)
13417 : "w"(a), "w"(b)
13418 : /* No clobbers */);
13419 return result;
13422 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13423 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
13425 uint64x2_t result;
13426 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13427 : "=w"(result)
13428 : "w"(a), "w"(b)
13429 : /* No clobbers */);
13430 return result;
13433 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13434 vuzp2_f32 (float32x2_t a, float32x2_t b)
13436 float32x2_t result;
13437 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13438 : "=w"(result)
13439 : "w"(a), "w"(b)
13440 : /* No clobbers */);
13441 return result;
13444 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13445 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
13447 poly8x8_t result;
13448 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13449 : "=w"(result)
13450 : "w"(a), "w"(b)
13451 : /* No clobbers */);
13452 return result;
13455 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13456 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
13458 poly16x4_t result;
13459 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13460 : "=w"(result)
13461 : "w"(a), "w"(b)
13462 : /* No clobbers */);
13463 return result;
13466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13467 vuzp2_s8 (int8x8_t a, int8x8_t b)
13469 int8x8_t result;
13470 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13471 : "=w"(result)
13472 : "w"(a), "w"(b)
13473 : /* No clobbers */);
13474 return result;
13477 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13478 vuzp2_s16 (int16x4_t a, int16x4_t b)
13480 int16x4_t result;
13481 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13482 : "=w"(result)
13483 : "w"(a), "w"(b)
13484 : /* No clobbers */);
13485 return result;
13488 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13489 vuzp2_s32 (int32x2_t a, int32x2_t b)
13491 int32x2_t result;
13492 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13493 : "=w"(result)
13494 : "w"(a), "w"(b)
13495 : /* No clobbers */);
13496 return result;
13499 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13500 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
13502 uint8x8_t result;
13503 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13504 : "=w"(result)
13505 : "w"(a), "w"(b)
13506 : /* No clobbers */);
13507 return result;
13510 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13511 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
13513 uint16x4_t result;
13514 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13515 : "=w"(result)
13516 : "w"(a), "w"(b)
13517 : /* No clobbers */);
13518 return result;
13521 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13522 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
13524 uint32x2_t result;
13525 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13526 : "=w"(result)
13527 : "w"(a), "w"(b)
13528 : /* No clobbers */);
13529 return result;
13532 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13533 vuzp2q_f32 (float32x4_t a, float32x4_t b)
13535 float32x4_t result;
13536 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13537 : "=w"(result)
13538 : "w"(a), "w"(b)
13539 : /* No clobbers */);
13540 return result;
13543 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13544 vuzp2q_f64 (float64x2_t a, float64x2_t b)
13546 float64x2_t result;
13547 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13548 : "=w"(result)
13549 : "w"(a), "w"(b)
13550 : /* No clobbers */);
13551 return result;
13554 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13555 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
13557 poly8x16_t result;
13558 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13559 : "=w"(result)
13560 : "w"(a), "w"(b)
13561 : /* No clobbers */);
13562 return result;
13565 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13566 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
13568 poly16x8_t result;
13569 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13570 : "=w"(result)
13571 : "w"(a), "w"(b)
13572 : /* No clobbers */);
13573 return result;
13576 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13577 vuzp2q_s8 (int8x16_t a, int8x16_t b)
13579 int8x16_t result;
13580 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13581 : "=w"(result)
13582 : "w"(a), "w"(b)
13583 : /* No clobbers */);
13584 return result;
13587 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13588 vuzp2q_s16 (int16x8_t a, int16x8_t b)
13590 int16x8_t result;
13591 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13592 : "=w"(result)
13593 : "w"(a), "w"(b)
13594 : /* No clobbers */);
13595 return result;
13598 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13599 vuzp2q_s32 (int32x4_t a, int32x4_t b)
13601 int32x4_t result;
13602 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13603 : "=w"(result)
13604 : "w"(a), "w"(b)
13605 : /* No clobbers */);
13606 return result;
13609 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13610 vuzp2q_s64 (int64x2_t a, int64x2_t b)
13612 int64x2_t result;
13613 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13614 : "=w"(result)
13615 : "w"(a), "w"(b)
13616 : /* No clobbers */);
13617 return result;
13620 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13621 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
13623 uint8x16_t result;
13624 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13625 : "=w"(result)
13626 : "w"(a), "w"(b)
13627 : /* No clobbers */);
13628 return result;
13631 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13632 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
13634 uint16x8_t result;
13635 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13636 : "=w"(result)
13637 : "w"(a), "w"(b)
13638 : /* No clobbers */);
13639 return result;
13642 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13643 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
13645 uint32x4_t result;
13646 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13647 : "=w"(result)
13648 : "w"(a), "w"(b)
13649 : /* No clobbers */);
13650 return result;
13653 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13654 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
13656 uint64x2_t result;
13657 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13658 : "=w"(result)
13659 : "w"(a), "w"(b)
13660 : /* No clobbers */);
13661 return result;
13664 /* End of temporary inline asm implementations. */
13666 /* Start of temporary inline asm for vldn, vstn and friends. */
13668 /* Create struct element types for duplicating loads.
13670 Create 2 element structures of:
13672 +------+----+----+----+----+
13673 | | 8 | 16 | 32 | 64 |
13674 +------+----+----+----+----+
13675 |int | Y | Y | N | N |
13676 +------+----+----+----+----+
13677 |uint | Y | Y | N | N |
13678 +------+----+----+----+----+
13679 |float | - | - | N | N |
13680 +------+----+----+----+----+
13681 |poly | Y | Y | - | - |
13682 +------+----+----+----+----+
13684 Create 3 element structures of:
13686 +------+----+----+----+----+
13687 | | 8 | 16 | 32 | 64 |
13688 +------+----+----+----+----+
13689 |int | Y | Y | Y | Y |
13690 +------+----+----+----+----+
13691 |uint | Y | Y | Y | Y |
13692 +------+----+----+----+----+
13693 |float | - | - | Y | Y |
13694 +------+----+----+----+----+
13695 |poly | Y | Y | - | - |
13696 +------+----+----+----+----+
13698 Create 4 element structures of:
13700 +------+----+----+----+----+
13701 | | 8 | 16 | 32 | 64 |
13702 +------+----+----+----+----+
13703 |int | Y | N | N | Y |
13704 +------+----+----+----+----+
13705 |uint | Y | N | N | Y |
13706 +------+----+----+----+----+
13707 |float | - | - | N | Y |
13708 +------+----+----+----+----+
13709 |poly | Y | N | - | - |
13710 +------+----+----+----+----+
13712 This is required for casting memory reference. */
13713 #define __STRUCTN(t, sz, nelem) \
13714 typedef struct t ## sz ## x ## nelem ## _t { \
13715 t ## sz ## _t val[nelem]; \
13716 } t ## sz ## x ## nelem ## _t;
13718 /* 2-element structs. */
13719 __STRUCTN (int, 8, 2)
13720 __STRUCTN (int, 16, 2)
13721 __STRUCTN (uint, 8, 2)
13722 __STRUCTN (uint, 16, 2)
13723 __STRUCTN (poly, 8, 2)
13724 __STRUCTN (poly, 16, 2)
13725 /* 3-element structs. */
13726 __STRUCTN (int, 8, 3)
13727 __STRUCTN (int, 16, 3)
13728 __STRUCTN (int, 32, 3)
13729 __STRUCTN (int, 64, 3)
13730 __STRUCTN (uint, 8, 3)
13731 __STRUCTN (uint, 16, 3)
13732 __STRUCTN (uint, 32, 3)
13733 __STRUCTN (uint, 64, 3)
13734 __STRUCTN (float, 32, 3)
13735 __STRUCTN (float, 64, 3)
13736 __STRUCTN (poly, 8, 3)
13737 __STRUCTN (poly, 16, 3)
13738 /* 4-element structs. */
13739 __STRUCTN (int, 8, 4)
13740 __STRUCTN (int, 64, 4)
13741 __STRUCTN (uint, 8, 4)
13742 __STRUCTN (uint, 64, 4)
13743 __STRUCTN (poly, 8, 4)
13744 __STRUCTN (float, 64, 4)
13745 #undef __STRUCTN
13747 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
13748 regsuffix, funcsuffix, Q) \
13749 __extension__ static __inline rettype \
13750 __attribute__ ((__always_inline__)) \
13751 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13753 rettype result; \
13754 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
13755 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
13756 : "=Q"(result) \
13757 : "Q"(*(const structtype *)ptr) \
13758 : "memory", "v16", "v17"); \
13759 return result; \
13762 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
13763 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
13764 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
13765 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
13766 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
13767 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
13768 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
13769 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
13770 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
13771 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
13772 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
13773 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
13774 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
13775 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
13776 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
13777 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
13778 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
13779 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
13780 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
13781 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
13782 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
13783 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
13784 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
13785 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
13787 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
13788 lnsuffix, funcsuffix, Q) \
13789 __extension__ static __inline rettype \
13790 __attribute__ ((__always_inline__)) \
13791 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
13792 rettype b, const int c) \
13794 rettype result; \
13795 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
13796 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
13797 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
13798 : "=Q"(result) \
13799 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
13800 : "memory", "v16", "v17"); \
13801 return result; \
13804 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
13805 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
13806 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
13807 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
13808 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
13809 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
13810 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
13811 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
13812 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
13813 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
13814 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
13815 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
13816 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
13817 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
13818 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
13819 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
13820 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
13821 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
13822 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
13823 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
13824 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
13825 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
13826 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
13827 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
13829 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
13830 regsuffix, funcsuffix, Q) \
13831 __extension__ static __inline rettype \
13832 __attribute__ ((__always_inline__)) \
13833 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13835 rettype result; \
13836 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
13837 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
13838 : "=Q"(result) \
13839 : "Q"(*(const structtype *)ptr) \
13840 : "memory", "v16", "v17", "v18"); \
13841 return result; \
13844 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
13845 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
13846 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
13847 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
13848 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
13849 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
13850 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
13851 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
13852 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
13853 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
13854 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
13855 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
13856 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
13857 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
13858 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
13859 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
13860 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
13861 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
13862 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
13863 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
13864 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
13865 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
13866 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
13867 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
13869 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
13870 lnsuffix, funcsuffix, Q) \
13871 __extension__ static __inline rettype \
13872 __attribute__ ((__always_inline__)) \
13873 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
13874 rettype b, const int c) \
13876 rettype result; \
13877 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
13878 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
13879 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
13880 : "=Q"(result) \
13881 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
13882 : "memory", "v16", "v17", "v18"); \
13883 return result; \
13886 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
13887 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
13888 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
13889 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
13890 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
13891 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
13892 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
13893 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
13894 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
13895 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
13896 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
13897 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
13898 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
13899 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
13900 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
13901 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
13902 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
13903 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
13904 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
13905 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
13906 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
13907 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
13908 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
13909 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
13911 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
13912 regsuffix, funcsuffix, Q) \
13913 __extension__ static __inline rettype \
13914 __attribute__ ((__always_inline__)) \
13915 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13917 rettype result; \
13918 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
13919 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
13920 : "=Q"(result) \
13921 : "Q"(*(const structtype *)ptr) \
13922 : "memory", "v16", "v17", "v18", "v19"); \
13923 return result; \
13926 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
13927 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
13928 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
13929 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
13930 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
13931 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
13932 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
13933 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
13934 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
13935 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
13936 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
13937 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
13938 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
13939 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
13940 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
13941 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
13942 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
13943 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
13944 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
13945 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
13946 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
13947 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
13948 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
13949 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
13951 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
13952 lnsuffix, funcsuffix, Q) \
13953 __extension__ static __inline rettype \
13954 __attribute__ ((__always_inline__)) \
13955 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
13956 rettype b, const int c) \
13958 rettype result; \
13959 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
13960 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
13961 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
13962 : "=Q"(result) \
13963 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
13964 : "memory", "v16", "v17", "v18", "v19"); \
13965 return result; \
13968 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
13969 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
13970 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
13971 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
13972 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
13973 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
13974 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
13975 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
13976 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
13977 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
13978 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
13979 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
13980 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
13981 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
13982 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
13983 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
13984 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
13985 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
13986 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
13987 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
13988 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
13989 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
13990 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
13991 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
13993 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
13994 mode, ptr_mode, funcsuffix, signedtype) \
13995 __extension__ static __inline void \
13996 __attribute__ ((__always_inline__)) \
13997 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
13998 intype __b, const int __c) \
14000 __builtin_aarch64_simd_oi __o; \
14001 largetype __temp; \
14002 __temp.val[0] \
14003 = vcombine_##funcsuffix (__b.val[0], \
14004 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14005 __temp.val[1] \
14006 = vcombine_##funcsuffix (__b.val[1], \
14007 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14008 __o = __builtin_aarch64_set_qregoi##mode (__o, \
14009 (signedtype) __temp.val[0], 0); \
14010 __o = __builtin_aarch64_set_qregoi##mode (__o, \
14011 (signedtype) __temp.val[1], 1); \
14012 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14013 __ptr, __o, __c); \
14016 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
14017 float32x4_t)
14018 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
14019 float64x2_t)
14020 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
14021 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
14022 int16x8_t)
14023 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
14024 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
14025 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
14026 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
14027 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
14028 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
14029 int16x8_t)
14030 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
14031 int32x4_t)
14032 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
14033 int64x2_t)
14035 #undef __ST2_LANE_FUNC
14036 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
14037 __extension__ static __inline void \
14038 __attribute__ ((__always_inline__)) \
14039 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
14040 intype __b, const int __c) \
14042 union { intype __i; \
14043 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
14044 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14045 __ptr, __temp.__o, __c); \
14048 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
14049 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
14050 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
14051 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
14052 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
14053 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
14054 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
14055 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
14056 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
14057 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
14058 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
14059 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
14061 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
14062 mode, ptr_mode, funcsuffix, signedtype) \
14063 __extension__ static __inline void \
14064 __attribute__ ((__always_inline__)) \
14065 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
14066 intype __b, const int __c) \
14068 __builtin_aarch64_simd_ci __o; \
14069 largetype __temp; \
14070 __temp.val[0] \
14071 = vcombine_##funcsuffix (__b.val[0], \
14072 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14073 __temp.val[1] \
14074 = vcombine_##funcsuffix (__b.val[1], \
14075 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14076 __temp.val[2] \
14077 = vcombine_##funcsuffix (__b.val[2], \
14078 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14079 __o = __builtin_aarch64_set_qregci##mode (__o, \
14080 (signedtype) __temp.val[0], 0); \
14081 __o = __builtin_aarch64_set_qregci##mode (__o, \
14082 (signedtype) __temp.val[1], 1); \
14083 __o = __builtin_aarch64_set_qregci##mode (__o, \
14084 (signedtype) __temp.val[2], 2); \
14085 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14086 __ptr, __o, __c); \
14089 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
14090 float32x4_t)
14091 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
14092 float64x2_t)
14093 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
14094 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
14095 int16x8_t)
14096 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
14097 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
14098 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
14099 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
14100 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
14101 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
14102 int16x8_t)
14103 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
14104 int32x4_t)
14105 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
14106 int64x2_t)
14108 #undef __ST3_LANE_FUNC
14109 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
14110 __extension__ static __inline void \
14111 __attribute__ ((__always_inline__)) \
14112 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
14113 intype __b, const int __c) \
14115 union { intype __i; \
14116 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
14117 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14118 __ptr, __temp.__o, __c); \
14121 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
14122 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
14123 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
14124 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
14125 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
14126 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
14127 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
14128 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
14129 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
14130 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
14131 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
14132 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
14134 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
14135 mode, ptr_mode, funcsuffix, signedtype) \
14136 __extension__ static __inline void \
14137 __attribute__ ((__always_inline__)) \
14138 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
14139 intype __b, const int __c) \
14141 __builtin_aarch64_simd_xi __o; \
14142 largetype __temp; \
14143 __temp.val[0] \
14144 = vcombine_##funcsuffix (__b.val[0], \
14145 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14146 __temp.val[1] \
14147 = vcombine_##funcsuffix (__b.val[1], \
14148 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14149 __temp.val[2] \
14150 = vcombine_##funcsuffix (__b.val[2], \
14151 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14152 __temp.val[3] \
14153 = vcombine_##funcsuffix (__b.val[3], \
14154 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
14155 __o = __builtin_aarch64_set_qregxi##mode (__o, \
14156 (signedtype) __temp.val[0], 0); \
14157 __o = __builtin_aarch64_set_qregxi##mode (__o, \
14158 (signedtype) __temp.val[1], 1); \
14159 __o = __builtin_aarch64_set_qregxi##mode (__o, \
14160 (signedtype) __temp.val[2], 2); \
14161 __o = __builtin_aarch64_set_qregxi##mode (__o, \
14162 (signedtype) __temp.val[3], 3); \
14163 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14164 __ptr, __o, __c); \
14167 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
14168 float32x4_t)
14169 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
14170 float64x2_t)
14171 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
14172 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
14173 int16x8_t)
14174 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
14175 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
14176 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
14177 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
14178 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
14179 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
14180 int16x8_t)
14181 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
14182 int32x4_t)
14183 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
14184 int64x2_t)
14186 #undef __ST4_LANE_FUNC
14187 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
14188 __extension__ static __inline void \
14189 __attribute__ ((__always_inline__)) \
14190 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
14191 intype __b, const int __c) \
14193 union { intype __i; \
14194 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
14195 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
14196 __ptr, __temp.__o, __c); \
14199 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
14200 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
14201 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
14202 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
14203 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
14204 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
14205 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
14206 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
14207 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
14208 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
14209 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
14210 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
14212 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14213 vaddlv_s32 (int32x2_t a)
14215 int64_t result;
14216 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14217 return result;
14220 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14221 vaddlv_u32 (uint32x2_t a)
14223 uint64_t result;
14224 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14225 return result;
14228 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14229 vpaddd_s64 (int64x2_t __a)
14231 return __builtin_aarch64_addpdi (__a);
14234 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14235 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14237 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
14240 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14241 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14243 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
14246 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14247 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14249 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
14252 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14253 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14255 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
14258 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14259 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14261 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
14264 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14265 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14267 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
14270 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14271 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14273 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
14276 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14277 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14279 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
14282 /* Table intrinsics. */
14284 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14285 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
14287 poly8x8_t result;
14288 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14289 : "=w"(result)
14290 : "w"(a), "w"(b)
14291 : /* No clobbers */);
14292 return result;
14295 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14296 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
14298 int8x8_t result;
14299 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14300 : "=w"(result)
14301 : "w"(a), "w"(b)
14302 : /* No clobbers */);
14303 return result;
14306 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14307 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
14309 uint8x8_t result;
14310 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14311 : "=w"(result)
14312 : "w"(a), "w"(b)
14313 : /* No clobbers */);
14314 return result;
14317 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14318 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
14320 poly8x16_t result;
14321 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14322 : "=w"(result)
14323 : "w"(a), "w"(b)
14324 : /* No clobbers */);
14325 return result;
14328 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14329 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
14331 int8x16_t result;
14332 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14333 : "=w"(result)
14334 : "w"(a), "w"(b)
14335 : /* No clobbers */);
14336 return result;
14339 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14340 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
14342 uint8x16_t result;
14343 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14344 : "=w"(result)
14345 : "w"(a), "w"(b)
14346 : /* No clobbers */);
14347 return result;
14350 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14351 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
14353 int8x8_t result;
14354 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14355 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14356 :"=w"(result)
14357 :"Q"(tab),"w"(idx)
14358 :"memory", "v16", "v17");
14359 return result;
14362 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14363 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
14365 uint8x8_t result;
14366 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14367 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14368 :"=w"(result)
14369 :"Q"(tab),"w"(idx)
14370 :"memory", "v16", "v17");
14371 return result;
14374 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14375 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
14377 poly8x8_t result;
14378 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14379 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14380 :"=w"(result)
14381 :"Q"(tab),"w"(idx)
14382 :"memory", "v16", "v17");
14383 return result;
14386 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14387 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
14389 int8x16_t result;
14390 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14391 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14392 :"=w"(result)
14393 :"Q"(tab),"w"(idx)
14394 :"memory", "v16", "v17");
14395 return result;
14398 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14399 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
14401 uint8x16_t result;
14402 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14403 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14404 :"=w"(result)
14405 :"Q"(tab),"w"(idx)
14406 :"memory", "v16", "v17");
14407 return result;
14410 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14411 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
14413 poly8x16_t result;
14414 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14415 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14416 :"=w"(result)
14417 :"Q"(tab),"w"(idx)
14418 :"memory", "v16", "v17");
14419 return result;
14422 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14423 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
14425 int8x8_t result;
14426 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14427 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14428 :"=w"(result)
14429 :"Q"(tab),"w"(idx)
14430 :"memory", "v16", "v17", "v18");
14431 return result;
14434 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14435 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
14437 uint8x8_t result;
14438 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14439 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14440 :"=w"(result)
14441 :"Q"(tab),"w"(idx)
14442 :"memory", "v16", "v17", "v18");
14443 return result;
14446 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14447 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
14449 poly8x8_t result;
14450 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14451 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14452 :"=w"(result)
14453 :"Q"(tab),"w"(idx)
14454 :"memory", "v16", "v17", "v18");
14455 return result;
14458 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14459 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
14461 int8x16_t result;
14462 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14463 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14464 :"=w"(result)
14465 :"Q"(tab),"w"(idx)
14466 :"memory", "v16", "v17", "v18");
14467 return result;
14470 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14471 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
14473 uint8x16_t result;
14474 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14475 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14476 :"=w"(result)
14477 :"Q"(tab),"w"(idx)
14478 :"memory", "v16", "v17", "v18");
14479 return result;
14482 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14483 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
14485 poly8x16_t result;
14486 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14487 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14488 :"=w"(result)
14489 :"Q"(tab),"w"(idx)
14490 :"memory", "v16", "v17", "v18");
14491 return result;
14494 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14495 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
14497 int8x8_t result;
14498 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14499 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14500 :"=w"(result)
14501 :"Q"(tab),"w"(idx)
14502 :"memory", "v16", "v17", "v18", "v19");
14503 return result;
14506 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14507 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
14509 uint8x8_t result;
14510 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14511 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14512 :"=w"(result)
14513 :"Q"(tab),"w"(idx)
14514 :"memory", "v16", "v17", "v18", "v19");
14515 return result;
14518 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14519 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
14521 poly8x8_t result;
14522 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14523 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14524 :"=w"(result)
14525 :"Q"(tab),"w"(idx)
14526 :"memory", "v16", "v17", "v18", "v19");
14527 return result;
14531 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14532 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
14534 int8x16_t result;
14535 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14536 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14537 :"=w"(result)
14538 :"Q"(tab),"w"(idx)
14539 :"memory", "v16", "v17", "v18", "v19");
14540 return result;
14543 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14544 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
14546 uint8x16_t result;
14547 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14548 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14549 :"=w"(result)
14550 :"Q"(tab),"w"(idx)
14551 :"memory", "v16", "v17", "v18", "v19");
14552 return result;
14555 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14556 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
14558 poly8x16_t result;
14559 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14560 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14561 :"=w"(result)
14562 :"Q"(tab),"w"(idx)
14563 :"memory", "v16", "v17", "v18", "v19");
14564 return result;
14568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14569 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
14571 int8x8_t result = r;
14572 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14573 : "+w"(result)
14574 : "w"(tab), "w"(idx)
14575 : /* No clobbers */);
14576 return result;
14579 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14580 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
14582 uint8x8_t result = r;
14583 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14584 : "+w"(result)
14585 : "w"(tab), "w"(idx)
14586 : /* No clobbers */);
14587 return result;
14590 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14591 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
14593 poly8x8_t result = r;
14594 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14595 : "+w"(result)
14596 : "w"(tab), "w"(idx)
14597 : /* No clobbers */);
14598 return result;
14601 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14602 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
14604 int8x16_t result = r;
14605 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14606 : "+w"(result)
14607 : "w"(tab), "w"(idx)
14608 : /* No clobbers */);
14609 return result;
14612 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14613 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
14615 uint8x16_t result = r;
14616 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14617 : "+w"(result)
14618 : "w"(tab), "w"(idx)
14619 : /* No clobbers */);
14620 return result;
14623 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14624 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
14626 poly8x16_t result = r;
14627 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14628 : "+w"(result)
14629 : "w"(tab), "w"(idx)
14630 : /* No clobbers */);
14631 return result;
14634 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14635 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
14637 int8x8_t result = r;
14638 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14639 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14640 :"+w"(result)
14641 :"Q"(tab),"w"(idx)
14642 :"memory", "v16", "v17");
14643 return result;
14646 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14647 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
14649 uint8x8_t result = r;
14650 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14651 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14652 :"+w"(result)
14653 :"Q"(tab),"w"(idx)
14654 :"memory", "v16", "v17");
14655 return result;
14658 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14659 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
14661 poly8x8_t result = r;
14662 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14663 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14664 :"+w"(result)
14665 :"Q"(tab),"w"(idx)
14666 :"memory", "v16", "v17");
14667 return result;
14671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14672 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
14674 int8x16_t result = r;
14675 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14676 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14677 :"+w"(result)
14678 :"Q"(tab),"w"(idx)
14679 :"memory", "v16", "v17");
14680 return result;
14683 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14684 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
14686 uint8x16_t result = r;
14687 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14688 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14689 :"+w"(result)
14690 :"Q"(tab),"w"(idx)
14691 :"memory", "v16", "v17");
14692 return result;
14695 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14696 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
14698 poly8x16_t result = r;
14699 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14700 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14701 :"+w"(result)
14702 :"Q"(tab),"w"(idx)
14703 :"memory", "v16", "v17");
14704 return result;
14708 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14709 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
14711 int8x8_t result = r;
14712 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14713 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14714 :"+w"(result)
14715 :"Q"(tab),"w"(idx)
14716 :"memory", "v16", "v17", "v18");
14717 return result;
14720 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14721 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
14723 uint8x8_t result = r;
14724 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14725 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14726 :"+w"(result)
14727 :"Q"(tab),"w"(idx)
14728 :"memory", "v16", "v17", "v18");
14729 return result;
14732 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14733 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
14735 poly8x8_t result = r;
14736 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14737 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14738 :"+w"(result)
14739 :"Q"(tab),"w"(idx)
14740 :"memory", "v16", "v17", "v18");
14741 return result;
14745 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14746 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
14748 int8x16_t result = r;
14749 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14750 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14751 :"+w"(result)
14752 :"Q"(tab),"w"(idx)
14753 :"memory", "v16", "v17", "v18");
14754 return result;
14757 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14758 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
14760 uint8x16_t result = r;
14761 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14762 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14763 :"+w"(result)
14764 :"Q"(tab),"w"(idx)
14765 :"memory", "v16", "v17", "v18");
14766 return result;
14769 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14770 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
14772 poly8x16_t result = r;
14773 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14774 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14775 :"+w"(result)
14776 :"Q"(tab),"w"(idx)
14777 :"memory", "v16", "v17", "v18");
14778 return result;
14782 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14783 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
14785 int8x8_t result = r;
14786 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14787 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14788 :"+w"(result)
14789 :"Q"(tab),"w"(idx)
14790 :"memory", "v16", "v17", "v18", "v19");
14791 return result;
14794 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14795 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
14797 uint8x8_t result = r;
14798 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14799 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14800 :"+w"(result)
14801 :"Q"(tab),"w"(idx)
14802 :"memory", "v16", "v17", "v18", "v19");
14803 return result;
14806 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14807 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
14809 poly8x8_t result = r;
14810 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14811 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14812 :"+w"(result)
14813 :"Q"(tab),"w"(idx)
14814 :"memory", "v16", "v17", "v18", "v19");
14815 return result;
14819 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14820 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
14822 int8x16_t result = r;
14823 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14824 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14825 :"+w"(result)
14826 :"Q"(tab),"w"(idx)
14827 :"memory", "v16", "v17", "v18", "v19");
14828 return result;
14831 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14832 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
14834 uint8x16_t result = r;
14835 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14836 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14837 :"+w"(result)
14838 :"Q"(tab),"w"(idx)
14839 :"memory", "v16", "v17", "v18", "v19");
14840 return result;
14843 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14844 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
14846 poly8x16_t result = r;
14847 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14848 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14849 :"+w"(result)
14850 :"Q"(tab),"w"(idx)
14851 :"memory", "v16", "v17", "v18", "v19");
14852 return result;
14855 /* V7 legacy table intrinsics. */
14857 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14858 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
14860 int8x8_t result;
14861 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
14862 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14863 : "=w"(result)
14864 : "w"(temp), "w"(idx)
14865 : /* No clobbers */);
14866 return result;
14869 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14870 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
14872 uint8x8_t result;
14873 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
14874 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14875 : "=w"(result)
14876 : "w"(temp), "w"(idx)
14877 : /* No clobbers */);
14878 return result;
14881 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14882 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
14884 poly8x8_t result;
14885 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
14886 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14887 : "=w"(result)
14888 : "w"(temp), "w"(idx)
14889 : /* No clobbers */);
14890 return result;
14893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14894 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
14896 int8x8_t result;
14897 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
14898 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14899 : "=w"(result)
14900 : "w"(temp), "w"(idx)
14901 : /* No clobbers */);
14902 return result;
14905 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14906 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
14908 uint8x8_t result;
14909 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
14910 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14911 : "=w"(result)
14912 : "w"(temp), "w"(idx)
14913 : /* No clobbers */);
14914 return result;
14917 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14918 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
14920 poly8x8_t result;
14921 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
14922 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14923 : "=w"(result)
14924 : "w"(temp), "w"(idx)
14925 : /* No clobbers */);
14926 return result;
14929 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14930 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
14932 int8x8_t result;
14933 int8x16x2_t temp;
14934 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
14935 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
14936 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
14937 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
14938 : "=w"(result)
14939 : "Q"(temp), "w"(idx)
14940 : "v16", "v17", "memory");
14941 return result;
14944 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14945 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
14947 uint8x8_t result;
14948 uint8x16x2_t temp;
14949 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
14950 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
14951 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
14952 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
14953 : "=w"(result)
14954 : "Q"(temp), "w"(idx)
14955 : "v16", "v17", "memory");
14956 return result;
14959 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14960 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
14962 poly8x8_t result;
14963 poly8x16x2_t temp;
14964 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
14965 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
14966 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
14967 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
14968 : "=w"(result)
14969 : "Q"(temp), "w"(idx)
14970 : "v16", "v17", "memory");
14971 return result;
14974 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14975 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
14977 int8x8_t result;
14978 int8x16x2_t temp;
14979 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
14980 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
14981 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
14982 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
14983 : "=w"(result)
14984 : "Q"(temp), "w"(idx)
14985 : "v16", "v17", "memory");
14986 return result;
14989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14990 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
14992 uint8x8_t result;
14993 uint8x16x2_t temp;
14994 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
14995 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
14996 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
14997 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
14998 : "=w"(result)
14999 : "Q"(temp), "w"(idx)
15000 : "v16", "v17", "memory");
15001 return result;
15004 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15005 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
15007 poly8x8_t result;
15008 poly8x16x2_t temp;
15009 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15010 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15011 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15012 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15013 : "=w"(result)
15014 : "Q"(temp), "w"(idx)
15015 : "v16", "v17", "memory");
15016 return result;
15019 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15020 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
15022 int8x8_t result = r;
15023 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15024 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15025 : "+w"(result)
15026 : "w"(temp), "w"(idx)
15027 : /* No clobbers */);
15028 return result;
15031 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15032 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
15034 uint8x8_t result = r;
15035 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15036 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15037 : "+w"(result)
15038 : "w"(temp), "w"(idx)
15039 : /* No clobbers */);
15040 return result;
15043 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15044 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
15046 poly8x8_t result = r;
15047 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15048 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15049 : "+w"(result)
15050 : "w"(temp), "w"(idx)
15051 : /* No clobbers */);
15052 return result;
15055 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15056 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
15058 int8x8_t result = r;
15059 int8x16x2_t temp;
15060 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15061 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15062 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15063 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15064 : "+w"(result)
15065 : "Q"(temp), "w"(idx)
15066 : "v16", "v17", "memory");
15067 return result;
15070 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15071 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
15073 uint8x8_t result = r;
15074 uint8x16x2_t temp;
15075 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15076 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15077 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15078 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15079 : "+w"(result)
15080 : "Q"(temp), "w"(idx)
15081 : "v16", "v17", "memory");
15082 return result;
15085 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15086 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
15088 poly8x8_t result = r;
15089 poly8x16x2_t temp;
15090 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15091 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15092 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15093 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15094 : "+w"(result)
15095 : "Q"(temp), "w"(idx)
15096 : "v16", "v17", "memory");
15097 return result;
15100 /* End of temporary inline asm. */
15102 /* Start of optimal implementations in approved order. */
15104 /* vabs */
15106 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15107 vabs_f32 (float32x2_t __a)
15109 return __builtin_aarch64_absv2sf (__a);
15112 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15113 vabs_f64 (float64x1_t __a)
15115 return __builtin_fabs (__a);
15118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15119 vabs_s8 (int8x8_t __a)
15121 return __builtin_aarch64_absv8qi (__a);
15124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15125 vabs_s16 (int16x4_t __a)
15127 return __builtin_aarch64_absv4hi (__a);
15130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15131 vabs_s32 (int32x2_t __a)
15133 return __builtin_aarch64_absv2si (__a);
15136 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15137 vabs_s64 (int64x1_t __a)
15139 return __builtin_llabs (__a);
15142 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15143 vabsq_f32 (float32x4_t __a)
15145 return __builtin_aarch64_absv4sf (__a);
15148 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15149 vabsq_f64 (float64x2_t __a)
15151 return __builtin_aarch64_absv2df (__a);
15154 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15155 vabsq_s8 (int8x16_t __a)
15157 return __builtin_aarch64_absv16qi (__a);
15160 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15161 vabsq_s16 (int16x8_t __a)
15163 return __builtin_aarch64_absv8hi (__a);
15166 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15167 vabsq_s32 (int32x4_t __a)
15169 return __builtin_aarch64_absv4si (__a);
15172 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15173 vabsq_s64 (int64x2_t __a)
15175 return __builtin_aarch64_absv2di (__a);
15178 /* vadd */
15180 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15181 vaddd_s64 (int64x1_t __a, int64x1_t __b)
15183 return __a + __b;
15186 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15187 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
15189 return __a + __b;
15192 /* vaddv */
15194 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15195 vaddv_s8 (int8x8_t __a)
15197 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
15200 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15201 vaddv_s16 (int16x4_t __a)
15203 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
15206 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15207 vaddv_s32 (int32x2_t __a)
15209 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
15212 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15213 vaddv_u8 (uint8x8_t __a)
15215 return vget_lane_u8 ((uint8x8_t)
15216 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
15220 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15221 vaddv_u16 (uint16x4_t __a)
15223 return vget_lane_u16 ((uint16x4_t)
15224 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
15228 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15229 vaddv_u32 (uint32x2_t __a)
15231 return vget_lane_u32 ((uint32x2_t)
15232 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
15236 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15237 vaddvq_s8 (int8x16_t __a)
15239 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
15243 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15244 vaddvq_s16 (int16x8_t __a)
15246 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
15249 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15250 vaddvq_s32 (int32x4_t __a)
15252 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
15255 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15256 vaddvq_s64 (int64x2_t __a)
15258 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
15261 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15262 vaddvq_u8 (uint8x16_t __a)
15264 return vgetq_lane_u8 ((uint8x16_t)
15265 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
15269 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15270 vaddvq_u16 (uint16x8_t __a)
15272 return vgetq_lane_u16 ((uint16x8_t)
15273 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
15277 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15278 vaddvq_u32 (uint32x4_t __a)
15280 return vgetq_lane_u32 ((uint32x4_t)
15281 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
15285 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15286 vaddvq_u64 (uint64x2_t __a)
15288 return vgetq_lane_u64 ((uint64x2_t)
15289 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
15293 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15294 vaddv_f32 (float32x2_t __a)
15296 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
15297 return vget_lane_f32 (__t, 0);
15300 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15301 vaddvq_f32 (float32x4_t __a)
15303 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
15304 return vgetq_lane_f32 (__t, 0);
15307 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15308 vaddvq_f64 (float64x2_t __a)
15310 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
15311 return vgetq_lane_f64 (__t, 0);
15314 /* vbsl */
15316 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15317 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
15319 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
15322 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15323 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
15325 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
15328 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15329 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
15331 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
15334 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15335 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
15337 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
15340 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15341 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
15343 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
15346 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15347 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
15349 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
15352 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15353 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
15355 return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
15358 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15359 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
15361 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
15364 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15365 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
15367 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
15370 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15371 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
15373 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
15376 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15377 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
15379 return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
15382 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15383 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
15385 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
15388 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15389 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
15391 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
15394 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15395 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
15397 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
15400 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15401 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
15403 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
15406 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15407 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
15409 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
15412 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15413 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
15415 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
15418 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15419 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
15421 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
15424 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15425 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
15427 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
15430 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15431 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
15433 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
15436 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15437 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
15439 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
15442 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15443 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
15445 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
15448 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15449 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
15451 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
15454 #ifdef __ARM_FEATURE_CRYPTO
15456 /* vaes */
15458 static __inline uint8x16_t
15459 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
15461 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
15464 static __inline uint8x16_t
15465 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
15467 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
15470 static __inline uint8x16_t
15471 vaesmcq_u8 (uint8x16_t data)
15473 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
15476 static __inline uint8x16_t
15477 vaesimcq_u8 (uint8x16_t data)
15479 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
15482 #endif
15484 /* vcage */
15486 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15487 vcages_f32 (float32_t __a, float32_t __b)
15489 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
15492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15493 vcage_f32 (float32x2_t __a, float32x2_t __b)
15495 return vabs_f32 (__a) >= vabs_f32 (__b);
15498 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15499 vcageq_f32 (float32x4_t __a, float32x4_t __b)
15501 return vabsq_f32 (__a) >= vabsq_f32 (__b);
15504 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15505 vcaged_f64 (float64_t __a, float64_t __b)
15507 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
15510 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15511 vcageq_f64 (float64x2_t __a, float64x2_t __b)
15513 return vabsq_f64 (__a) >= vabsq_f64 (__b);
15516 /* vcagt */
15518 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15519 vcagts_f32 (float32_t __a, float32_t __b)
15521 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
15524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15525 vcagt_f32 (float32x2_t __a, float32x2_t __b)
15527 return vabs_f32 (__a) > vabs_f32 (__b);
15530 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15531 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
15533 return vabsq_f32 (__a) > vabsq_f32 (__b);
15536 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15537 vcagtd_f64 (float64_t __a, float64_t __b)
15539 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
15542 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15543 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
15545 return vabsq_f64 (__a) > vabsq_f64 (__b);
15548 /* vcale */
15550 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15551 vcale_f32 (float32x2_t __a, float32x2_t __b)
15553 return vabs_f32 (__a) <= vabs_f32 (__b);
15556 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15557 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
15559 return vabsq_f32 (__a) <= vabsq_f32 (__b);
15562 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15563 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
15565 return vabsq_f64 (__a) <= vabsq_f64 (__b);
15568 /* vcalt */
15570 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15571 vcalt_f32 (float32x2_t __a, float32x2_t __b)
15573 return vabs_f32 (__a) < vabs_f32 (__b);
15576 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15577 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
15579 return vabsq_f32 (__a) < vabsq_f32 (__b);
15582 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15583 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
15585 return vabsq_f64 (__a) < vabsq_f64 (__b);
15588 /* vceq - vector. */
15590 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15591 vceq_f32 (float32x2_t __a, float32x2_t __b)
15593 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15596 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15597 vceq_f64 (float64x1_t __a, float64x1_t __b)
15599 return __a == __b ? -1ll : 0ll;
15602 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15603 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
15605 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15606 (int8x8_t) __b);
15609 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15610 vceq_s8 (int8x8_t __a, int8x8_t __b)
15612 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15615 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15616 vceq_s16 (int16x4_t __a, int16x4_t __b)
15618 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15621 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15622 vceq_s32 (int32x2_t __a, int32x2_t __b)
15624 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15627 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15628 vceq_s64 (int64x1_t __a, int64x1_t __b)
15630 return __a == __b ? -1ll : 0ll;
15633 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15634 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
15636 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15637 (int8x8_t) __b);
15640 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15641 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
15643 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15644 (int16x4_t) __b);
15647 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15648 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
15650 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15651 (int32x2_t) __b);
15654 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15655 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
15657 return __a == __b ? -1ll : 0ll;
15660 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15661 vceqq_f32 (float32x4_t __a, float32x4_t __b)
15663 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15666 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15667 vceqq_f64 (float64x2_t __a, float64x2_t __b)
15669 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15672 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15673 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
15675 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15676 (int8x16_t) __b);
15679 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15680 vceqq_s8 (int8x16_t __a, int8x16_t __b)
15682 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15685 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15686 vceqq_s16 (int16x8_t __a, int16x8_t __b)
15688 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15691 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15692 vceqq_s32 (int32x4_t __a, int32x4_t __b)
15694 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
15697 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15698 vceqq_s64 (int64x2_t __a, int64x2_t __b)
15700 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
15703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15704 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
15706 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15707 (int8x16_t) __b);
15710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15711 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
15713 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
15714 (int16x8_t) __b);
15717 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15718 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
15720 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
15721 (int32x4_t) __b);
15724 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15725 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
15727 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
15728 (int64x2_t) __b);
15731 /* vceq - scalar. */
15733 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15734 vceqs_f32 (float32_t __a, float32_t __b)
15736 return __a == __b ? -1 : 0;
15739 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15740 vceqd_s64 (int64x1_t __a, int64x1_t __b)
15742 return __a == __b ? -1ll : 0ll;
15745 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15746 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
15748 return __a == __b ? -1ll : 0ll;
15751 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15752 vceqd_f64 (float64_t __a, float64_t __b)
15754 return __a == __b ? -1ll : 0ll;
15757 /* vceqz - vector. */
15759 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15760 vceqz_f32 (float32x2_t __a)
15762 float32x2_t __b = {0.0f, 0.0f};
15763 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15766 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15767 vceqz_f64 (float64x1_t __a)
15769 return __a == 0.0 ? -1ll : 0ll;
15772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15773 vceqz_p8 (poly8x8_t __a)
15775 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15776 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15777 (int8x8_t) __b);
15780 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15781 vceqz_s8 (int8x8_t __a)
15783 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15784 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15787 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15788 vceqz_s16 (int16x4_t __a)
15790 int16x4_t __b = {0, 0, 0, 0};
15791 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15794 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15795 vceqz_s32 (int32x2_t __a)
15797 int32x2_t __b = {0, 0};
15798 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15801 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15802 vceqz_s64 (int64x1_t __a)
15804 return __a == 0ll ? -1ll : 0ll;
15807 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15808 vceqz_u8 (uint8x8_t __a)
15810 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15811 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15812 (int8x8_t) __b);
15815 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15816 vceqz_u16 (uint16x4_t __a)
15818 uint16x4_t __b = {0, 0, 0, 0};
15819 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15820 (int16x4_t) __b);
15823 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15824 vceqz_u32 (uint32x2_t __a)
15826 uint32x2_t __b = {0, 0};
15827 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15828 (int32x2_t) __b);
15831 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15832 vceqz_u64 (uint64x1_t __a)
15834 return __a == 0ll ? -1ll : 0ll;
15837 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15838 vceqzq_f32 (float32x4_t __a)
15840 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15841 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15844 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15845 vceqzq_f64 (float64x2_t __a)
15847 float64x2_t __b = {0.0, 0.0};
15848 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15851 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15852 vceqzq_p8 (poly8x16_t __a)
15854 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15855 0, 0, 0, 0, 0, 0, 0, 0};
15856 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15857 (int8x16_t) __b);
15860 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15861 vceqzq_s8 (int8x16_t __a)
15863 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15864 0, 0, 0, 0, 0, 0, 0, 0};
15865 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15869 vceqzq_s16 (int16x8_t __a)
15871 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15872 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15875 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15876 vceqzq_s32 (int32x4_t __a)
15878 int32x4_t __b = {0, 0, 0, 0};
15879 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
15882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15883 vceqzq_s64 (int64x2_t __a)
15885 int64x2_t __b = {0, 0};
15886 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
15889 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15890 vceqzq_u8 (uint8x16_t __a)
15892 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15893 0, 0, 0, 0, 0, 0, 0, 0};
15894 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15895 (int8x16_t) __b);
15898 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15899 vceqzq_u16 (uint16x8_t __a)
15901 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15902 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
15903 (int16x8_t) __b);
15906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15907 vceqzq_u32 (uint32x4_t __a)
15909 uint32x4_t __b = {0, 0, 0, 0};
15910 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
15911 (int32x4_t) __b);
15914 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15915 vceqzq_u64 (uint64x2_t __a)
15917 uint64x2_t __b = {0, 0};
15918 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
15919 (int64x2_t) __b);
15922 /* vceqz - scalar. */
15924 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15925 vceqzs_f32 (float32_t __a)
15927 return __a == 0.0f ? -1 : 0;
15930 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15931 vceqzd_s64 (int64x1_t __a)
15933 return __a == 0 ? -1ll : 0ll;
15936 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15937 vceqzd_u64 (int64x1_t __a)
15939 return __a == 0 ? -1ll : 0ll;
15942 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15943 vceqzd_f64 (float64_t __a)
15945 return __a == 0.0 ? -1ll : 0ll;
15948 /* vcge - vector. */
15950 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15951 vcge_f32 (float32x2_t __a, float32x2_t __b)
15953 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
15956 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15957 vcge_f64 (float64x1_t __a, float64x1_t __b)
15959 return __a >= __b ? -1ll : 0ll;
15962 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15963 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
15965 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
15966 (int8x8_t) __b);
15969 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15970 vcge_s8 (int8x8_t __a, int8x8_t __b)
15972 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
15975 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15976 vcge_s16 (int16x4_t __a, int16x4_t __b)
15978 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
15981 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15982 vcge_s32 (int32x2_t __a, int32x2_t __b)
15984 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
15987 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15988 vcge_s64 (int64x1_t __a, int64x1_t __b)
15990 return __a >= __b ? -1ll : 0ll;
15993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15994 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
15996 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
15997 (int8x8_t) __b);
16000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16001 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
16003 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16004 (int16x4_t) __b);
16007 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16008 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
16010 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16011 (int32x2_t) __b);
16014 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16015 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
16017 return __a >= __b ? -1ll : 0ll;
16020 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16021 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
16023 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16026 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16027 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
16029 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16032 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16033 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
16035 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16036 (int8x16_t) __b);
16039 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16040 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
16042 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16045 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16046 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
16048 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16051 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16052 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
16054 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16057 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16058 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
16060 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16064 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
16066 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16067 (int8x16_t) __b);
16070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16071 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
16073 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16074 (int16x8_t) __b);
16077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16078 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
16080 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16081 (int32x4_t) __b);
16084 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16085 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
16087 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16088 (int64x2_t) __b);
16091 /* vcge - scalar. */
16093 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16094 vcges_f32 (float32_t __a, float32_t __b)
16096 return __a >= __b ? -1 : 0;
16099 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16100 vcged_s64 (int64x1_t __a, int64x1_t __b)
16102 return __a >= __b ? -1ll : 0ll;
16105 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16106 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
16108 return __a >= __b ? -1ll : 0ll;
16111 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16112 vcged_f64 (float64_t __a, float64_t __b)
16114 return __a >= __b ? -1ll : 0ll;
16117 /* vcgez - vector. */
16119 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16120 vcgez_f32 (float32x2_t __a)
16122 float32x2_t __b = {0.0f, 0.0f};
16123 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16126 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16127 vcgez_f64 (float64x1_t __a)
16129 return __a >= 0.0 ? -1ll : 0ll;
16132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16133 vcgez_p8 (poly8x8_t __a)
16135 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16136 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16137 (int8x8_t) __b);
16140 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16141 vcgez_s8 (int8x8_t __a)
16143 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16144 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16147 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16148 vcgez_s16 (int16x4_t __a)
16150 int16x4_t __b = {0, 0, 0, 0};
16151 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16154 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16155 vcgez_s32 (int32x2_t __a)
16157 int32x2_t __b = {0, 0};
16158 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16162 vcgez_s64 (int64x1_t __a)
16164 return __a >= 0ll ? -1ll : 0ll;
16167 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16168 vcgez_u8 (uint8x8_t __a)
16170 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16171 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16172 (int8x8_t) __b);
16175 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16176 vcgez_u16 (uint16x4_t __a)
16178 uint16x4_t __b = {0, 0, 0, 0};
16179 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16180 (int16x4_t) __b);
16183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16184 vcgez_u32 (uint32x2_t __a)
16186 uint32x2_t __b = {0, 0};
16187 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16188 (int32x2_t) __b);
16191 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16192 vcgez_u64 (uint64x1_t __a)
16194 return __a >= 0ll ? -1ll : 0ll;
16197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16198 vcgezq_f32 (float32x4_t __a)
16200 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16201 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16204 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16205 vcgezq_f64 (float64x2_t __a)
16207 float64x2_t __b = {0.0, 0.0};
16208 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16211 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16212 vcgezq_p8 (poly8x16_t __a)
16214 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16215 0, 0, 0, 0, 0, 0, 0, 0};
16216 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16217 (int8x16_t) __b);
16220 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16221 vcgezq_s8 (int8x16_t __a)
16223 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16224 0, 0, 0, 0, 0, 0, 0, 0};
16225 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16228 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16229 vcgezq_s16 (int16x8_t __a)
16231 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16232 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16235 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16236 vcgezq_s32 (int32x4_t __a)
16238 int32x4_t __b = {0, 0, 0, 0};
16239 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16242 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16243 vcgezq_s64 (int64x2_t __a)
16245 int64x2_t __b = {0, 0};
16246 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16249 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16250 vcgezq_u8 (uint8x16_t __a)
16252 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16253 0, 0, 0, 0, 0, 0, 0, 0};
16254 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16255 (int8x16_t) __b);
16258 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16259 vcgezq_u16 (uint16x8_t __a)
16261 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16262 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16263 (int16x8_t) __b);
16266 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16267 vcgezq_u32 (uint32x4_t __a)
16269 uint32x4_t __b = {0, 0, 0, 0};
16270 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16271 (int32x4_t) __b);
16274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16275 vcgezq_u64 (uint64x2_t __a)
16277 uint64x2_t __b = {0, 0};
16278 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16279 (int64x2_t) __b);
16282 /* vcgez - scalar. */
16284 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16285 vcgezs_f32 (float32_t __a)
16287 return __a >= 0.0f ? -1 : 0;
16290 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16291 vcgezd_s64 (int64x1_t __a)
16293 return __a >= 0 ? -1ll : 0ll;
16296 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16297 vcgezd_u64 (int64x1_t __a)
16299 return __a >= 0 ? -1ll : 0ll;
16302 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16303 vcgezd_f64 (float64_t __a)
16305 return __a >= 0.0 ? -1ll : 0ll;
16308 /* vcgt - vector. */
16310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16311 vcgt_f32 (float32x2_t __a, float32x2_t __b)
16313 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16316 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16317 vcgt_f64 (float64x1_t __a, float64x1_t __b)
16319 return __a > __b ? -1ll : 0ll;
16322 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16323 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
16325 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16326 (int8x8_t) __b);
16329 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16330 vcgt_s8 (int8x8_t __a, int8x8_t __b)
16332 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16335 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16336 vcgt_s16 (int16x4_t __a, int16x4_t __b)
16338 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16341 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16342 vcgt_s32 (int32x2_t __a, int32x2_t __b)
16344 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16347 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16348 vcgt_s64 (int64x1_t __a, int64x1_t __b)
16350 return __a > __b ? -1ll : 0ll;
16353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16354 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
16356 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16357 (int8x8_t) __b);
16360 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16361 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
16363 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16364 (int16x4_t) __b);
16367 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16368 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
16370 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16371 (int32x2_t) __b);
16374 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16375 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
16377 return __a > __b ? -1ll : 0ll;
16380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16381 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
16383 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16386 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16387 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
16389 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16392 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16393 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
16395 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16396 (int8x16_t) __b);
16399 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16400 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
16402 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16405 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16406 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
16408 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16411 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16412 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
16414 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16417 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16418 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
16420 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16423 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16424 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
16426 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16427 (int8x16_t) __b);
16430 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16431 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
16433 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16434 (int16x8_t) __b);
16437 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16438 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
16440 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16441 (int32x4_t) __b);
16444 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16445 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
16447 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16448 (int64x2_t) __b);
16451 /* vcgt - scalar. */
16453 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16454 vcgts_f32 (float32_t __a, float32_t __b)
16456 return __a > __b ? -1 : 0;
16459 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16460 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
16462 return __a > __b ? -1ll : 0ll;
16465 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16466 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
16468 return __a > __b ? -1ll : 0ll;
16471 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16472 vcgtd_f64 (float64_t __a, float64_t __b)
16474 return __a > __b ? -1ll : 0ll;
16477 /* vcgtz - vector. */
16479 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16480 vcgtz_f32 (float32x2_t __a)
16482 float32x2_t __b = {0.0f, 0.0f};
16483 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16486 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16487 vcgtz_f64 (float64x1_t __a)
16489 return __a > 0.0 ? -1ll : 0ll;
16492 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16493 vcgtz_p8 (poly8x8_t __a)
16495 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16496 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16497 (int8x8_t) __b);
16500 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16501 vcgtz_s8 (int8x8_t __a)
16503 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16504 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16507 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16508 vcgtz_s16 (int16x4_t __a)
16510 int16x4_t __b = {0, 0, 0, 0};
16511 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16514 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16515 vcgtz_s32 (int32x2_t __a)
16517 int32x2_t __b = {0, 0};
16518 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16521 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16522 vcgtz_s64 (int64x1_t __a)
16524 return __a > 0ll ? -1ll : 0ll;
16527 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16528 vcgtz_u8 (uint8x8_t __a)
16530 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16531 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16532 (int8x8_t) __b);
16535 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16536 vcgtz_u16 (uint16x4_t __a)
16538 uint16x4_t __b = {0, 0, 0, 0};
16539 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16540 (int16x4_t) __b);
16543 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16544 vcgtz_u32 (uint32x2_t __a)
16546 uint32x2_t __b = {0, 0};
16547 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16548 (int32x2_t) __b);
16551 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16552 vcgtz_u64 (uint64x1_t __a)
16554 return __a > 0ll ? -1ll : 0ll;
16557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16558 vcgtzq_f32 (float32x4_t __a)
16560 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16561 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16565 vcgtzq_f64 (float64x2_t __a)
16567 float64x2_t __b = {0.0, 0.0};
16568 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16571 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16572 vcgtzq_p8 (poly8x16_t __a)
16574 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16575 0, 0, 0, 0, 0, 0, 0, 0};
16576 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16577 (int8x16_t) __b);
16580 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16581 vcgtzq_s8 (int8x16_t __a)
16583 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16584 0, 0, 0, 0, 0, 0, 0, 0};
16585 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16588 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16589 vcgtzq_s16 (int16x8_t __a)
16591 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16592 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16596 vcgtzq_s32 (int32x4_t __a)
16598 int32x4_t __b = {0, 0, 0, 0};
16599 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16602 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16603 vcgtzq_s64 (int64x2_t __a)
16605 int64x2_t __b = {0, 0};
16606 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16609 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16610 vcgtzq_u8 (uint8x16_t __a)
16612 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16613 0, 0, 0, 0, 0, 0, 0, 0};
16614 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16615 (int8x16_t) __b);
16618 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16619 vcgtzq_u16 (uint16x8_t __a)
16621 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16622 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16623 (int16x8_t) __b);
16626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16627 vcgtzq_u32 (uint32x4_t __a)
16629 uint32x4_t __b = {0, 0, 0, 0};
16630 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16631 (int32x4_t) __b);
16634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16635 vcgtzq_u64 (uint64x2_t __a)
16637 uint64x2_t __b = {0, 0};
16638 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16639 (int64x2_t) __b);
16642 /* vcgtz - scalar. */
16644 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16645 vcgtzs_f32 (float32_t __a)
16647 return __a > 0.0f ? -1 : 0;
16650 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16651 vcgtzd_s64 (int64x1_t __a)
16653 return __a > 0 ? -1ll : 0ll;
16656 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16657 vcgtzd_u64 (int64x1_t __a)
16659 return __a > 0 ? -1ll : 0ll;
16662 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16663 vcgtzd_f64 (float64_t __a)
16665 return __a > 0.0 ? -1ll : 0ll;
16668 /* vcle - vector. */
16670 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16671 vcle_f32 (float32x2_t __a, float32x2_t __b)
16673 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
16676 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16677 vcle_f64 (float64x1_t __a, float64x1_t __b)
16679 return __a <= __b ? -1ll : 0ll;
16682 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16683 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
16685 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
16686 (int8x8_t) __a);
16689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16690 vcle_s8 (int8x8_t __a, int8x8_t __b)
16692 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
16695 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16696 vcle_s16 (int16x4_t __a, int16x4_t __b)
16698 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
16701 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16702 vcle_s32 (int32x2_t __a, int32x2_t __b)
16704 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
16707 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16708 vcle_s64 (int64x1_t __a, int64x1_t __b)
16710 return __a <= __b ? -1ll : 0ll;
16713 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16714 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
16716 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
16717 (int8x8_t) __a);
16720 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16721 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
16723 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
16724 (int16x4_t) __a);
16727 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16728 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
16730 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
16731 (int32x2_t) __a);
16734 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16735 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
16737 return __a <= __b ? -1ll : 0ll;
16740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16741 vcleq_f32 (float32x4_t __a, float32x4_t __b)
16743 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
16746 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16747 vcleq_f64 (float64x2_t __a, float64x2_t __b)
16749 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
16752 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16753 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
16755 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
16756 (int8x16_t) __a);
16759 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16760 vcleq_s8 (int8x16_t __a, int8x16_t __b)
16762 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
16765 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16766 vcleq_s16 (int16x8_t __a, int16x8_t __b)
16768 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
16771 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16772 vcleq_s32 (int32x4_t __a, int32x4_t __b)
16774 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
16777 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16778 vcleq_s64 (int64x2_t __a, int64x2_t __b)
16780 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
16783 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16784 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
16786 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
16787 (int8x16_t) __a);
16790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16791 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
16793 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
16794 (int16x8_t) __a);
16797 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16798 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
16800 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
16801 (int32x4_t) __a);
16804 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16805 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
16807 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
16808 (int64x2_t) __a);
16811 /* vcle - scalar. */
16813 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16814 vcles_f32 (float32_t __a, float32_t __b)
16816 return __a <= __b ? -1 : 0;
16819 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16820 vcled_s64 (int64x1_t __a, int64x1_t __b)
16822 return __a <= __b ? -1ll : 0ll;
16825 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16826 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
16828 return __a <= __b ? -1ll : 0ll;
16831 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16832 vcled_f64 (float64_t __a, float64_t __b)
16834 return __a <= __b ? -1ll : 0ll;
16837 /* vclez - vector. */
16839 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16840 vclez_f32 (float32x2_t __a)
16842 float32x2_t __b = {0.0f, 0.0f};
16843 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
16846 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16847 vclez_f64 (float64x1_t __a)
16849 return __a <= 0.0 ? -1ll : 0ll;
16852 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16853 vclez_p8 (poly8x8_t __a)
16855 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16856 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
16857 (int8x8_t) __b);
16860 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16861 vclez_s8 (int8x8_t __a)
16863 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16864 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
16867 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16868 vclez_s16 (int16x4_t __a)
16870 int16x4_t __b = {0, 0, 0, 0};
16871 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
16874 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16875 vclez_s32 (int32x2_t __a)
16877 int32x2_t __b = {0, 0};
16878 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
16881 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16882 vclez_s64 (int64x1_t __a)
16884 return __a <= 0ll ? -1ll : 0ll;
16887 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16888 vclez_u64 (uint64x1_t __a)
16890 return __a <= 0ll ? -1ll : 0ll;
16893 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16894 vclezq_f32 (float32x4_t __a)
16896 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16897 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
16900 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16901 vclezq_f64 (float64x2_t __a)
16903 float64x2_t __b = {0.0, 0.0};
16904 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
16907 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16908 vclezq_p8 (poly8x16_t __a)
16910 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16911 0, 0, 0, 0, 0, 0, 0, 0};
16912 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
16913 (int8x16_t) __b);
16916 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16917 vclezq_s8 (int8x16_t __a)
16919 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16920 0, 0, 0, 0, 0, 0, 0, 0};
16921 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
16924 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16925 vclezq_s16 (int16x8_t __a)
16927 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16928 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
16931 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16932 vclezq_s32 (int32x4_t __a)
16934 int32x4_t __b = {0, 0, 0, 0};
16935 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
16938 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16939 vclezq_s64 (int64x2_t __a)
16941 int64x2_t __b = {0, 0};
16942 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
16945 /* vclez - scalar. */
16947 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16948 vclezs_f32 (float32_t __a)
16950 return __a <= 0.0f ? -1 : 0;
16953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16954 vclezd_s64 (int64x1_t __a)
16956 return __a <= 0 ? -1ll : 0ll;
16959 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16960 vclezd_u64 (int64x1_t __a)
16962 return __a <= 0 ? -1ll : 0ll;
16965 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16966 vclezd_f64 (float64_t __a)
16968 return __a <= 0.0 ? -1ll : 0ll;
16971 /* vclt - vector. */
16973 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16974 vclt_f32 (float32x2_t __a, float32x2_t __b)
16976 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
16979 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16980 vclt_f64 (float64x1_t __a, float64x1_t __b)
16982 return __a < __b ? -1ll : 0ll;
16985 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16986 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
16988 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
16989 (int8x8_t) __a);
16992 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16993 vclt_s8 (int8x8_t __a, int8x8_t __b)
16995 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
16998 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16999 vclt_s16 (int16x4_t __a, int16x4_t __b)
17001 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
17004 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17005 vclt_s32 (int32x2_t __a, int32x2_t __b)
17007 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
17010 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17011 vclt_s64 (int64x1_t __a, int64x1_t __b)
17013 return __a < __b ? -1ll : 0ll;
17016 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17017 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
17019 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
17020 (int8x8_t) __a);
17023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17024 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
17026 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
17027 (int16x4_t) __a);
17030 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17031 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
17033 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
17034 (int32x2_t) __a);
17037 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17038 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
17040 return __a < __b ? -1ll : 0ll;
17043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17044 vcltq_f32 (float32x4_t __a, float32x4_t __b)
17046 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
17049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17050 vcltq_f64 (float64x2_t __a, float64x2_t __b)
17052 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
17055 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17056 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
17058 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
17059 (int8x16_t) __a);
17062 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17063 vcltq_s8 (int8x16_t __a, int8x16_t __b)
17065 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
17068 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17069 vcltq_s16 (int16x8_t __a, int16x8_t __b)
17071 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
17074 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17075 vcltq_s32 (int32x4_t __a, int32x4_t __b)
17077 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
17080 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17081 vcltq_s64 (int64x2_t __a, int64x2_t __b)
17083 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
17086 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17087 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
17089 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
17090 (int8x16_t) __a);
17093 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17094 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
17096 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
17097 (int16x8_t) __a);
17100 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17101 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
17103 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
17104 (int32x4_t) __a);
17107 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17108 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
17110 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
17111 (int64x2_t) __a);
17114 /* vclt - scalar. */
17116 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17117 vclts_f32 (float32_t __a, float32_t __b)
17119 return __a < __b ? -1 : 0;
17122 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17123 vcltd_s64 (int64x1_t __a, int64x1_t __b)
17125 return __a < __b ? -1ll : 0ll;
17128 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17129 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
17131 return __a < __b ? -1ll : 0ll;
17134 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17135 vcltd_f64 (float64_t __a, float64_t __b)
17137 return __a < __b ? -1ll : 0ll;
17140 /* vcltz - vector. */
17142 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17143 vcltz_f32 (float32x2_t __a)
17145 float32x2_t __b = {0.0f, 0.0f};
17146 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
17149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17150 vcltz_f64 (float64x1_t __a)
17152 return __a < 0.0 ? -1ll : 0ll;
17155 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17156 vcltz_p8 (poly8x8_t __a)
17158 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17159 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
17160 (int8x8_t) __b);
17163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17164 vcltz_s8 (int8x8_t __a)
17166 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17167 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
17170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17171 vcltz_s16 (int16x4_t __a)
17173 int16x4_t __b = {0, 0, 0, 0};
17174 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
17177 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17178 vcltz_s32 (int32x2_t __a)
17180 int32x2_t __b = {0, 0};
17181 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
17184 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17185 vcltz_s64 (int64x1_t __a)
17187 return __a < 0ll ? -1ll : 0ll;
17190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17191 vcltzq_f32 (float32x4_t __a)
17193 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17194 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
17197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17198 vcltzq_f64 (float64x2_t __a)
17200 float64x2_t __b = {0.0, 0.0};
17201 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
17204 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17205 vcltzq_p8 (poly8x16_t __a)
17207 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17208 0, 0, 0, 0, 0, 0, 0, 0};
17209 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
17210 (int8x16_t) __b);
17213 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17214 vcltzq_s8 (int8x16_t __a)
17216 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17217 0, 0, 0, 0, 0, 0, 0, 0};
17218 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
17221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17222 vcltzq_s16 (int16x8_t __a)
17224 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17225 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
17228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17229 vcltzq_s32 (int32x4_t __a)
17231 int32x4_t __b = {0, 0, 0, 0};
17232 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
17235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17236 vcltzq_s64 (int64x2_t __a)
17238 int64x2_t __b = {0, 0};
17239 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
17242 /* vcltz - scalar. */
17244 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17245 vcltzs_f32 (float32_t __a)
17247 return __a < 0.0f ? -1 : 0;
17250 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17251 vcltzd_s64 (int64x1_t __a)
17253 return __a < 0 ? -1ll : 0ll;
17256 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17257 vcltzd_u64 (int64x1_t __a)
17259 return __a < 0 ? -1ll : 0ll;
17262 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17263 vcltzd_f64 (float64_t __a)
17265 return __a < 0.0 ? -1ll : 0ll;
17268 /* vclz. */
17270 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17271 vclz_s8 (int8x8_t __a)
17273 return __builtin_aarch64_clzv8qi (__a);
17276 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17277 vclz_s16 (int16x4_t __a)
17279 return __builtin_aarch64_clzv4hi (__a);
17282 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17283 vclz_s32 (int32x2_t __a)
17285 return __builtin_aarch64_clzv2si (__a);
17288 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17289 vclz_u8 (uint8x8_t __a)
17291 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
17294 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17295 vclz_u16 (uint16x4_t __a)
17297 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
17300 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17301 vclz_u32 (uint32x2_t __a)
17303 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
17306 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17307 vclzq_s8 (int8x16_t __a)
17309 return __builtin_aarch64_clzv16qi (__a);
17312 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17313 vclzq_s16 (int16x8_t __a)
17315 return __builtin_aarch64_clzv8hi (__a);
17318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17319 vclzq_s32 (int32x4_t __a)
17321 return __builtin_aarch64_clzv4si (__a);
17324 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17325 vclzq_u8 (uint8x16_t __a)
17327 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
17330 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17331 vclzq_u16 (uint16x8_t __a)
17333 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
17336 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17337 vclzq_u32 (uint32x4_t __a)
17339 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
17342 /* vcvt (double -> float). */
17344 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17345 vcvt_f32_f64 (float64x2_t __a)
17347 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
17350 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17351 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
17353 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
17356 /* vcvt (float -> double). */
17358 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17359 vcvt_f64_f32 (float32x2_t __a)
17362 return __builtin_aarch64_float_extend_lo_v2df (__a);
17365 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17366 vcvt_high_f64_f32 (float32x4_t __a)
17368 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
17371 /* vcvt (<u>int -> float) */
17373 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17374 vcvtd_f64_s64 (int64_t __a)
17376 return (float64_t) __a;
17379 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17380 vcvtd_f64_u64 (uint64_t __a)
17382 return (float64_t) __a;
17385 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17386 vcvts_f32_s32 (int32_t __a)
17388 return (float32_t) __a;
17391 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17392 vcvts_f32_u32 (uint32_t __a)
17394 return (float32_t) __a;
17397 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17398 vcvt_f32_s32 (int32x2_t __a)
17400 return __builtin_aarch64_floatv2siv2sf (__a);
17403 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17404 vcvt_f32_u32 (uint32x2_t __a)
17406 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
17409 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17410 vcvtq_f32_s32 (int32x4_t __a)
17412 return __builtin_aarch64_floatv4siv4sf (__a);
17415 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17416 vcvtq_f32_u32 (uint32x4_t __a)
17418 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
17421 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17422 vcvtq_f64_s64 (int64x2_t __a)
17424 return __builtin_aarch64_floatv2div2df (__a);
17427 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17428 vcvtq_f64_u64 (uint64x2_t __a)
17430 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
17433 /* vcvt (float -> <u>int) */
17435 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17436 vcvtd_s64_f64 (float64_t __a)
17438 return (int64_t) __a;
17441 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17442 vcvtd_u64_f64 (float64_t __a)
17444 return (uint64_t) __a;
17447 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17448 vcvts_s32_f32 (float32_t __a)
17450 return (int32_t) __a;
17453 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17454 vcvts_u32_f32 (float32_t __a)
17456 return (uint32_t) __a;
17459 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17460 vcvt_s32_f32 (float32x2_t __a)
17462 return __builtin_aarch64_lbtruncv2sfv2si (__a);
17465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17466 vcvt_u32_f32 (float32x2_t __a)
17468 /* TODO: This cast should go away when builtins have
17469 their correct types. */
17470 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
17473 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17474 vcvtq_s32_f32 (float32x4_t __a)
17476 return __builtin_aarch64_lbtruncv4sfv4si (__a);
17479 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17480 vcvtq_u32_f32 (float32x4_t __a)
17482 /* TODO: This cast should go away when builtins have
17483 their correct types. */
17484 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
17487 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17488 vcvtq_s64_f64 (float64x2_t __a)
17490 return __builtin_aarch64_lbtruncv2dfv2di (__a);
17493 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17494 vcvtq_u64_f64 (float64x2_t __a)
17496 /* TODO: This cast should go away when builtins have
17497 their correct types. */
17498 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
17501 /* vcvta */
17503 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17504 vcvtad_s64_f64 (float64_t __a)
17506 return __builtin_aarch64_lrounddfdi (__a);
17509 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17510 vcvtad_u64_f64 (float64_t __a)
17512 return __builtin_aarch64_lroundudfdi (__a);
17515 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17516 vcvtas_s32_f32 (float32_t __a)
17518 return __builtin_aarch64_lroundsfsi (__a);
17521 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17522 vcvtas_u32_f32 (float32_t __a)
17524 return __builtin_aarch64_lroundusfsi (__a);
17527 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17528 vcvta_s32_f32 (float32x2_t __a)
17530 return __builtin_aarch64_lroundv2sfv2si (__a);
17533 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17534 vcvta_u32_f32 (float32x2_t __a)
17536 /* TODO: This cast should go away when builtins have
17537 their correct types. */
17538 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
17541 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17542 vcvtaq_s32_f32 (float32x4_t __a)
17544 return __builtin_aarch64_lroundv4sfv4si (__a);
17547 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17548 vcvtaq_u32_f32 (float32x4_t __a)
17550 /* TODO: This cast should go away when builtins have
17551 their correct types. */
17552 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
17555 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17556 vcvtaq_s64_f64 (float64x2_t __a)
17558 return __builtin_aarch64_lroundv2dfv2di (__a);
17561 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17562 vcvtaq_u64_f64 (float64x2_t __a)
17564 /* TODO: This cast should go away when builtins have
17565 their correct types. */
17566 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
17569 /* vcvtm */
17571 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17572 vcvtmd_s64_f64 (float64_t __a)
17574 return __builtin_llfloor (__a);
17577 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17578 vcvtmd_u64_f64 (float64_t __a)
17580 return __builtin_aarch64_lfloorudfdi (__a);
17583 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17584 vcvtms_s32_f32 (float32_t __a)
17586 return __builtin_ifloorf (__a);
17589 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17590 vcvtms_u32_f32 (float32_t __a)
17592 return __builtin_aarch64_lfloorusfsi (__a);
17595 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17596 vcvtm_s32_f32 (float32x2_t __a)
17598 return __builtin_aarch64_lfloorv2sfv2si (__a);
17601 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17602 vcvtm_u32_f32 (float32x2_t __a)
17604 /* TODO: This cast should go away when builtins have
17605 their correct types. */
17606 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
17609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17610 vcvtmq_s32_f32 (float32x4_t __a)
17612 return __builtin_aarch64_lfloorv4sfv4si (__a);
17615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17616 vcvtmq_u32_f32 (float32x4_t __a)
17618 /* TODO: This cast should go away when builtins have
17619 their correct types. */
17620 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
17623 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17624 vcvtmq_s64_f64 (float64x2_t __a)
17626 return __builtin_aarch64_lfloorv2dfv2di (__a);
17629 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17630 vcvtmq_u64_f64 (float64x2_t __a)
17632 /* TODO: This cast should go away when builtins have
17633 their correct types. */
17634 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
17637 /* vcvtn */
17639 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17640 vcvtnd_s64_f64 (float64_t __a)
17642 return __builtin_aarch64_lfrintndfdi (__a);
17645 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17646 vcvtnd_u64_f64 (float64_t __a)
17648 return __builtin_aarch64_lfrintnudfdi (__a);
17651 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17652 vcvtns_s32_f32 (float32_t __a)
17654 return __builtin_aarch64_lfrintnsfsi (__a);
17657 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17658 vcvtns_u32_f32 (float32_t __a)
17660 return __builtin_aarch64_lfrintnusfsi (__a);
17663 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17664 vcvtn_s32_f32 (float32x2_t __a)
17666 return __builtin_aarch64_lfrintnv2sfv2si (__a);
17669 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17670 vcvtn_u32_f32 (float32x2_t __a)
17672 /* TODO: This cast should go away when builtins have
17673 their correct types. */
17674 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
17677 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17678 vcvtnq_s32_f32 (float32x4_t __a)
17680 return __builtin_aarch64_lfrintnv4sfv4si (__a);
17683 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17684 vcvtnq_u32_f32 (float32x4_t __a)
17686 /* TODO: This cast should go away when builtins have
17687 their correct types. */
17688 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
17691 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17692 vcvtnq_s64_f64 (float64x2_t __a)
17694 return __builtin_aarch64_lfrintnv2dfv2di (__a);
17697 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17698 vcvtnq_u64_f64 (float64x2_t __a)
17700 /* TODO: This cast should go away when builtins have
17701 their correct types. */
17702 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
17705 /* vcvtp */
17707 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17708 vcvtpd_s64_f64 (float64_t __a)
17710 return __builtin_llceil (__a);
17713 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17714 vcvtpd_u64_f64 (float64_t __a)
17716 return __builtin_aarch64_lceiludfdi (__a);
17719 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17720 vcvtps_s32_f32 (float32_t __a)
17722 return __builtin_iceilf (__a);
17725 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17726 vcvtps_u32_f32 (float32_t __a)
17728 return __builtin_aarch64_lceilusfsi (__a);
17731 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17732 vcvtp_s32_f32 (float32x2_t __a)
17734 return __builtin_aarch64_lceilv2sfv2si (__a);
17737 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17738 vcvtp_u32_f32 (float32x2_t __a)
17740 /* TODO: This cast should go away when builtins have
17741 their correct types. */
17742 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
17745 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17746 vcvtpq_s32_f32 (float32x4_t __a)
17748 return __builtin_aarch64_lceilv4sfv4si (__a);
17751 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17752 vcvtpq_u32_f32 (float32x4_t __a)
17754 /* TODO: This cast should go away when builtins have
17755 their correct types. */
17756 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
17759 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17760 vcvtpq_s64_f64 (float64x2_t __a)
17762 return __builtin_aarch64_lceilv2dfv2di (__a);
17765 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17766 vcvtpq_u64_f64 (float64x2_t __a)
17768 /* TODO: This cast should go away when builtins have
17769 their correct types. */
17770 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
17773 /* vdup_n */
17775 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17776 vdup_n_f32 (float32_t __a)
17778 return (float32x2_t) {__a, __a};
17781 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17782 vdup_n_f64 (float64_t __a)
17784 return __a;
17787 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17788 vdup_n_p8 (poly8_t __a)
17790 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17793 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17794 vdup_n_p16 (poly16_t __a)
17796 return (poly16x4_t) {__a, __a, __a, __a};
17799 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17800 vdup_n_s8 (int8_t __a)
17802 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17805 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17806 vdup_n_s16 (int16_t __a)
17808 return (int16x4_t) {__a, __a, __a, __a};
17811 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17812 vdup_n_s32 (int32_t __a)
17814 return (int32x2_t) {__a, __a};
17817 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17818 vdup_n_s64 (int64_t __a)
17820 return __a;
17823 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17824 vdup_n_u8 (uint8_t __a)
17826 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17829 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17830 vdup_n_u16 (uint16_t __a)
17832 return (uint16x4_t) {__a, __a, __a, __a};
17835 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17836 vdup_n_u32 (uint32_t __a)
17838 return (uint32x2_t) {__a, __a};
17841 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17842 vdup_n_u64 (uint64_t __a)
17844 return __a;
17847 /* vdupq_n */
17849 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17850 vdupq_n_f32 (float32_t __a)
17852 return (float32x4_t) {__a, __a, __a, __a};
17855 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17856 vdupq_n_f64 (float64_t __a)
17858 return (float64x2_t) {__a, __a};
17861 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17862 vdupq_n_p8 (uint32_t __a)
17864 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17865 __a, __a, __a, __a, __a, __a, __a, __a};
17868 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17869 vdupq_n_p16 (uint32_t __a)
17871 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17874 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17875 vdupq_n_s8 (int32_t __a)
17877 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17878 __a, __a, __a, __a, __a, __a, __a, __a};
17881 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17882 vdupq_n_s16 (int32_t __a)
17884 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17888 vdupq_n_s32 (int32_t __a)
17890 return (int32x4_t) {__a, __a, __a, __a};
17893 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17894 vdupq_n_s64 (int64_t __a)
17896 return (int64x2_t) {__a, __a};
17899 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17900 vdupq_n_u8 (uint32_t __a)
17902 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17903 __a, __a, __a, __a, __a, __a, __a, __a};
17906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17907 vdupq_n_u16 (uint32_t __a)
17909 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17913 vdupq_n_u32 (uint32_t __a)
17915 return (uint32x4_t) {__a, __a, __a, __a};
17918 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17919 vdupq_n_u64 (uint64_t __a)
17921 return (uint64x2_t) {__a, __a};
17924 /* vdup_lane */
17926 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17927 vdup_lane_f32 (float32x2_t __a, const int __b)
17929 return __aarch64_vdup_lane_f32 (__a, __b);
17932 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17933 vdup_lane_f64 (float64x1_t __a, const int __b)
17935 return __aarch64_vdup_lane_f64 (__a, __b);
17938 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17939 vdup_lane_p8 (poly8x8_t __a, const int __b)
17941 return __aarch64_vdup_lane_p8 (__a, __b);
17944 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17945 vdup_lane_p16 (poly16x4_t __a, const int __b)
17947 return __aarch64_vdup_lane_p16 (__a, __b);
17950 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17951 vdup_lane_s8 (int8x8_t __a, const int __b)
17953 return __aarch64_vdup_lane_s8 (__a, __b);
17956 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17957 vdup_lane_s16 (int16x4_t __a, const int __b)
17959 return __aarch64_vdup_lane_s16 (__a, __b);
17962 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17963 vdup_lane_s32 (int32x2_t __a, const int __b)
17965 return __aarch64_vdup_lane_s32 (__a, __b);
17968 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17969 vdup_lane_s64 (int64x1_t __a, const int __b)
17971 return __aarch64_vdup_lane_s64 (__a, __b);
17974 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17975 vdup_lane_u8 (uint8x8_t __a, const int __b)
17977 return __aarch64_vdup_lane_u8 (__a, __b);
17980 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17981 vdup_lane_u16 (uint16x4_t __a, const int __b)
17983 return __aarch64_vdup_lane_u16 (__a, __b);
17986 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17987 vdup_lane_u32 (uint32x2_t __a, const int __b)
17989 return __aarch64_vdup_lane_u32 (__a, __b);
17992 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17993 vdup_lane_u64 (uint64x1_t __a, const int __b)
17995 return __aarch64_vdup_lane_u64 (__a, __b);
17998 /* vdup_laneq */
18000 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18001 vdup_laneq_f32 (float32x4_t __a, const int __b)
18003 return __aarch64_vdup_laneq_f32 (__a, __b);
18006 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18007 vdup_laneq_f64 (float64x2_t __a, const int __b)
18009 return __aarch64_vdup_laneq_f64 (__a, __b);
18012 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18013 vdup_laneq_p8 (poly8x16_t __a, const int __b)
18015 return __aarch64_vdup_laneq_p8 (__a, __b);
18018 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18019 vdup_laneq_p16 (poly16x8_t __a, const int __b)
18021 return __aarch64_vdup_laneq_p16 (__a, __b);
18024 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18025 vdup_laneq_s8 (int8x16_t __a, const int __b)
18027 return __aarch64_vdup_laneq_s8 (__a, __b);
18030 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18031 vdup_laneq_s16 (int16x8_t __a, const int __b)
18033 return __aarch64_vdup_laneq_s16 (__a, __b);
18036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18037 vdup_laneq_s32 (int32x4_t __a, const int __b)
18039 return __aarch64_vdup_laneq_s32 (__a, __b);
18042 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18043 vdup_laneq_s64 (int64x2_t __a, const int __b)
18045 return __aarch64_vdup_laneq_s64 (__a, __b);
18048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18049 vdup_laneq_u8 (uint8x16_t __a, const int __b)
18051 return __aarch64_vdup_laneq_u8 (__a, __b);
18054 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18055 vdup_laneq_u16 (uint16x8_t __a, const int __b)
18057 return __aarch64_vdup_laneq_u16 (__a, __b);
18060 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18061 vdup_laneq_u32 (uint32x4_t __a, const int __b)
18063 return __aarch64_vdup_laneq_u32 (__a, __b);
18066 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18067 vdup_laneq_u64 (uint64x2_t __a, const int __b)
18069 return __aarch64_vdup_laneq_u64 (__a, __b);
18072 /* vdupq_lane */
18073 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18074 vdupq_lane_f32 (float32x2_t __a, const int __b)
18076 return __aarch64_vdupq_lane_f32 (__a, __b);
18079 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18080 vdupq_lane_f64 (float64x1_t __a, const int __b)
18082 return __aarch64_vdupq_lane_f64 (__a, __b);
18085 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18086 vdupq_lane_p8 (poly8x8_t __a, const int __b)
18088 return __aarch64_vdupq_lane_p8 (__a, __b);
18091 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18092 vdupq_lane_p16 (poly16x4_t __a, const int __b)
18094 return __aarch64_vdupq_lane_p16 (__a, __b);
18097 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18098 vdupq_lane_s8 (int8x8_t __a, const int __b)
18100 return __aarch64_vdupq_lane_s8 (__a, __b);
18103 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18104 vdupq_lane_s16 (int16x4_t __a, const int __b)
18106 return __aarch64_vdupq_lane_s16 (__a, __b);
18109 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18110 vdupq_lane_s32 (int32x2_t __a, const int __b)
18112 return __aarch64_vdupq_lane_s32 (__a, __b);
18115 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18116 vdupq_lane_s64 (int64x1_t __a, const int __b)
18118 return __aarch64_vdupq_lane_s64 (__a, __b);
18121 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18122 vdupq_lane_u8 (uint8x8_t __a, const int __b)
18124 return __aarch64_vdupq_lane_u8 (__a, __b);
18127 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18128 vdupq_lane_u16 (uint16x4_t __a, const int __b)
18130 return __aarch64_vdupq_lane_u16 (__a, __b);
18133 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18134 vdupq_lane_u32 (uint32x2_t __a, const int __b)
18136 return __aarch64_vdupq_lane_u32 (__a, __b);
18139 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18140 vdupq_lane_u64 (uint64x1_t __a, const int __b)
18142 return __aarch64_vdupq_lane_u64 (__a, __b);
18145 /* vdupq_laneq */
18146 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18147 vdupq_laneq_f32 (float32x4_t __a, const int __b)
18149 return __aarch64_vdupq_laneq_f32 (__a, __b);
18152 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18153 vdupq_laneq_f64 (float64x2_t __a, const int __b)
18155 return __aarch64_vdupq_laneq_f64 (__a, __b);
18158 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18159 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
18161 return __aarch64_vdupq_laneq_p8 (__a, __b);
18164 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18165 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
18167 return __aarch64_vdupq_laneq_p16 (__a, __b);
18170 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18171 vdupq_laneq_s8 (int8x16_t __a, const int __b)
18173 return __aarch64_vdupq_laneq_s8 (__a, __b);
18176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18177 vdupq_laneq_s16 (int16x8_t __a, const int __b)
18179 return __aarch64_vdupq_laneq_s16 (__a, __b);
18182 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18183 vdupq_laneq_s32 (int32x4_t __a, const int __b)
18185 return __aarch64_vdupq_laneq_s32 (__a, __b);
18188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18189 vdupq_laneq_s64 (int64x2_t __a, const int __b)
18191 return __aarch64_vdupq_laneq_s64 (__a, __b);
18194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18195 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
18197 return __aarch64_vdupq_laneq_u8 (__a, __b);
18200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18201 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
18203 return __aarch64_vdupq_laneq_u16 (__a, __b);
18206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18207 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
18209 return __aarch64_vdupq_laneq_u32 (__a, __b);
18212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18213 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
18215 return __aarch64_vdupq_laneq_u64 (__a, __b);
18218 /* vdupb_lane */
18219 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18220 vdupb_lane_p8 (poly8x8_t __a, const int __b)
18222 return __aarch64_vget_lane_p8 (__a, __b);
18225 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18226 vdupb_lane_s8 (int8x8_t __a, const int __b)
18228 return __aarch64_vget_lane_s8 (__a, __b);
18231 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18232 vdupb_lane_u8 (uint8x8_t __a, const int __b)
18234 return __aarch64_vget_lane_u8 (__a, __b);
18237 /* vduph_lane */
18238 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18239 vduph_lane_p16 (poly16x4_t __a, const int __b)
18241 return __aarch64_vget_lane_p16 (__a, __b);
18244 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18245 vduph_lane_s16 (int16x4_t __a, const int __b)
18247 return __aarch64_vget_lane_s16 (__a, __b);
18250 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18251 vduph_lane_u16 (uint16x4_t __a, const int __b)
18253 return __aarch64_vget_lane_u16 (__a, __b);
18256 /* vdups_lane */
18257 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18258 vdups_lane_f32 (float32x2_t __a, const int __b)
18260 return __aarch64_vget_lane_f32 (__a, __b);
18263 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18264 vdups_lane_s32 (int32x2_t __a, const int __b)
18266 return __aarch64_vget_lane_s32 (__a, __b);
18269 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18270 vdups_lane_u32 (uint32x2_t __a, const int __b)
18272 return __aarch64_vget_lane_u32 (__a, __b);
18275 /* vdupd_lane */
18276 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18277 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
18279 return __a;
18282 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18283 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
18285 return __a;
18288 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18289 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
18291 return __a;
18294 /* vdupb_laneq */
18295 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18296 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
18298 return __aarch64_vgetq_lane_p8 (__a, __b);
18301 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18302 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
18304 return __aarch64_vgetq_lane_s8 (__a, __b);
18307 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18308 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
18310 return __aarch64_vgetq_lane_u8 (__a, __b);
18313 /* vduph_laneq */
18314 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18315 vduph_laneq_p16 (poly16x8_t __a, const int __b)
18317 return __aarch64_vgetq_lane_p16 (__a, __b);
18320 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18321 vduph_laneq_s16 (int16x8_t __a, const int __b)
18323 return __aarch64_vgetq_lane_s16 (__a, __b);
18326 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18327 vduph_laneq_u16 (uint16x8_t __a, const int __b)
18329 return __aarch64_vgetq_lane_u16 (__a, __b);
18332 /* vdups_laneq */
18333 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18334 vdups_laneq_f32 (float32x4_t __a, const int __b)
18336 return __aarch64_vgetq_lane_f32 (__a, __b);
18339 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18340 vdups_laneq_s32 (int32x4_t __a, const int __b)
18342 return __aarch64_vgetq_lane_s32 (__a, __b);
18345 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18346 vdups_laneq_u32 (uint32x4_t __a, const int __b)
18348 return __aarch64_vgetq_lane_u32 (__a, __b);
18351 /* vdupd_laneq */
18352 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18353 vdupd_laneq_f64 (float64x2_t __a, const int __b)
18355 return __aarch64_vgetq_lane_f64 (__a, __b);
18358 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18359 vdupd_laneq_s64 (int64x2_t __a, const int __b)
18361 return __aarch64_vgetq_lane_s64 (__a, __b);
18364 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18365 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
18367 return __aarch64_vgetq_lane_u64 (__a, __b);
18370 /* vfma_lane */
18372 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18373 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
18374 float32x2_t __c, const int __lane)
18376 return __builtin_aarch64_fmav2sf (__b,
18377 __aarch64_vdup_lane_f32 (__c, __lane),
18378 __a);
18381 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18382 vfma_lane_f64 (float64_t __a, float64_t __b,
18383 float64_t __c, const int __lane)
18385 return __builtin_fma (__b, __c, __a);
18388 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18389 vfmad_lane_f64 (float64_t __a, float64_t __b,
18390 float64_t __c, const int __lane)
18392 return __builtin_fma (__b, __c, __a);
18395 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18396 vfmas_lane_f32 (float32_t __a, float32_t __b,
18397 float32x2_t __c, const int __lane)
18399 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18402 /* vfma_laneq */
18404 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18405 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
18406 float32x4_t __c, const int __lane)
18408 return __builtin_aarch64_fmav2sf (__b,
18409 __aarch64_vdup_laneq_f32 (__c, __lane),
18410 __a);
18413 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18414 vfma_laneq_f64 (float64_t __a, float64_t __b,
18415 float64x2_t __c, const int __lane)
18417 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18420 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18421 vfmad_laneq_f64 (float64_t __a, float64_t __b,
18422 float64x2_t __c, const int __lane)
18424 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18427 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18428 vfmas_laneq_f32 (float32_t __a, float32_t __b,
18429 float32x4_t __c, const int __lane)
18431 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18434 /* vfmaq_lane */
18436 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18437 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18438 float32x2_t __c, const int __lane)
18440 return __builtin_aarch64_fmav4sf (__b,
18441 __aarch64_vdupq_lane_f32 (__c, __lane),
18442 __a);
18445 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18446 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
18447 float64_t __c, const int __lane)
18449 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
18452 /* vfmaq_laneq */
18454 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18455 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18456 float32x4_t __c, const int __lane)
18458 return __builtin_aarch64_fmav4sf (__b,
18459 __aarch64_vdupq_laneq_f32 (__c, __lane),
18460 __a);
18463 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18464 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18465 float64x2_t __c, const int __lane)
18467 return __builtin_aarch64_fmav2df (__b,
18468 __aarch64_vdupq_laneq_f64 (__c, __lane),
18469 __a);
18472 /* vfms_lane */
18474 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18475 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
18476 float32x2_t __c, const int __lane)
18478 return __builtin_aarch64_fmav2sf (-__b,
18479 __aarch64_vdup_lane_f32 (__c, __lane),
18480 __a);
18483 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18484 vfms_lane_f64 (float64_t __a, float64_t __b,
18485 float64_t __c, const int __lane)
18487 return __builtin_fma (-__b, __c, __a);
18490 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18491 vfmsd_lane_f64 (float64_t __a, float64_t __b,
18492 float64_t __c, const int __lane)
18494 return __builtin_fma (-__b, __c, __a);
18497 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18498 vfmss_lane_f32 (float32_t __a, float32_t __b,
18499 float32x2_t __c, const int __lane)
18501 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18504 /* vfms_laneq */
18506 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18507 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
18508 float32x4_t __c, const int __lane)
18510 return __builtin_aarch64_fmav2sf (-__b,
18511 __aarch64_vdup_laneq_f32 (__c, __lane),
18512 __a);
18515 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18516 vfms_laneq_f64 (float64_t __a, float64_t __b,
18517 float64x2_t __c, const int __lane)
18519 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18522 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18523 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
18524 float64x2_t __c, const int __lane)
18526 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18529 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18530 vfmss_laneq_f32 (float32_t __a, float32_t __b,
18531 float32x4_t __c, const int __lane)
18533 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18536 /* vfmsq_lane */
18538 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18539 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18540 float32x2_t __c, const int __lane)
18542 return __builtin_aarch64_fmav4sf (-__b,
18543 __aarch64_vdupq_lane_f32 (__c, __lane),
18544 __a);
18547 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18548 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
18549 float64_t __c, const int __lane)
18551 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
18554 /* vfmsq_laneq */
18556 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18557 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18558 float32x4_t __c, const int __lane)
18560 return __builtin_aarch64_fmav4sf (-__b,
18561 __aarch64_vdupq_laneq_f32 (__c, __lane),
18562 __a);
18565 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18566 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18567 float64x2_t __c, const int __lane)
18569 return __builtin_aarch64_fmav2df (-__b,
18570 __aarch64_vdupq_laneq_f64 (__c, __lane),
18571 __a);
18574 /* vld1 */
18576 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18577 vld1_f32 (const float32_t *a)
18579 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
18582 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18583 vld1_f64 (const float64_t *a)
18585 return *a;
18588 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18589 vld1_p8 (const poly8_t *a)
18591 return (poly8x8_t)
18592 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18595 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18596 vld1_p16 (const poly16_t *a)
18598 return (poly16x4_t)
18599 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18602 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18603 vld1_s8 (const int8_t *a)
18605 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18608 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18609 vld1_s16 (const int16_t *a)
18611 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18614 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18615 vld1_s32 (const int32_t *a)
18617 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18620 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18621 vld1_s64 (const int64_t *a)
18623 return *a;
18626 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18627 vld1_u8 (const uint8_t *a)
18629 return (uint8x8_t)
18630 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18633 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18634 vld1_u16 (const uint16_t *a)
18636 return (uint16x4_t)
18637 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18641 vld1_u32 (const uint32_t *a)
18643 return (uint32x2_t)
18644 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18647 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18648 vld1_u64 (const uint64_t *a)
18650 return *a;
18653 /* vld1q */
18655 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18656 vld1q_f32 (const float32_t *a)
18658 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
18661 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18662 vld1q_f64 (const float64_t *a)
18664 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
18667 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18668 vld1q_p8 (const poly8_t *a)
18670 return (poly8x16_t)
18671 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18674 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18675 vld1q_p16 (const poly16_t *a)
18677 return (poly16x8_t)
18678 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18681 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18682 vld1q_s8 (const int8_t *a)
18684 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18687 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18688 vld1q_s16 (const int16_t *a)
18690 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18693 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18694 vld1q_s32 (const int32_t *a)
18696 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18699 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18700 vld1q_s64 (const int64_t *a)
18702 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18705 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18706 vld1q_u8 (const uint8_t *a)
18708 return (uint8x16_t)
18709 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18712 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18713 vld1q_u16 (const uint16_t *a)
18715 return (uint16x8_t)
18716 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18719 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18720 vld1q_u32 (const uint32_t *a)
18722 return (uint32x4_t)
18723 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18726 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18727 vld1q_u64 (const uint64_t *a)
18729 return (uint64x2_t)
18730 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18733 /* vldn */
18735 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
18736 vld2_s64 (const int64_t * __a)
18738 int64x1x2_t ret;
18739 __builtin_aarch64_simd_oi __o;
18740 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18741 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18742 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18743 return ret;
18746 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
18747 vld2_u64 (const uint64_t * __a)
18749 uint64x1x2_t ret;
18750 __builtin_aarch64_simd_oi __o;
18751 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18752 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18753 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18754 return ret;
18757 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
18758 vld2_f64 (const float64_t * __a)
18760 float64x1x2_t ret;
18761 __builtin_aarch64_simd_oi __o;
18762 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
18763 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
18764 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
18765 return ret;
18768 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
18769 vld2_s8 (const int8_t * __a)
18771 int8x8x2_t ret;
18772 __builtin_aarch64_simd_oi __o;
18773 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18774 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18775 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18776 return ret;
18779 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
18780 vld2_p8 (const poly8_t * __a)
18782 poly8x8x2_t ret;
18783 __builtin_aarch64_simd_oi __o;
18784 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18785 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18786 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18787 return ret;
18790 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
18791 vld2_s16 (const int16_t * __a)
18793 int16x4x2_t ret;
18794 __builtin_aarch64_simd_oi __o;
18795 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18796 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18797 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18798 return ret;
18801 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
18802 vld2_p16 (const poly16_t * __a)
18804 poly16x4x2_t ret;
18805 __builtin_aarch64_simd_oi __o;
18806 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18807 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18808 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18809 return ret;
18812 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
18813 vld2_s32 (const int32_t * __a)
18815 int32x2x2_t ret;
18816 __builtin_aarch64_simd_oi __o;
18817 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18818 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18819 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18820 return ret;
18823 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
18824 vld2_u8 (const uint8_t * __a)
18826 uint8x8x2_t ret;
18827 __builtin_aarch64_simd_oi __o;
18828 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18829 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18830 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18831 return ret;
18834 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
18835 vld2_u16 (const uint16_t * __a)
18837 uint16x4x2_t ret;
18838 __builtin_aarch64_simd_oi __o;
18839 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18840 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18841 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18842 return ret;
18845 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
18846 vld2_u32 (const uint32_t * __a)
18848 uint32x2x2_t ret;
18849 __builtin_aarch64_simd_oi __o;
18850 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18851 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18852 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18853 return ret;
18856 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
18857 vld2_f32 (const float32_t * __a)
18859 float32x2x2_t ret;
18860 __builtin_aarch64_simd_oi __o;
18861 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
18862 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
18863 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
18864 return ret;
18867 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
18868 vld2q_s8 (const int8_t * __a)
18870 int8x16x2_t ret;
18871 __builtin_aarch64_simd_oi __o;
18872 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
18873 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
18874 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
18875 return ret;
18878 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
18879 vld2q_p8 (const poly8_t * __a)
18881 poly8x16x2_t ret;
18882 __builtin_aarch64_simd_oi __o;
18883 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
18884 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
18885 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
18886 return ret;
18889 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
18890 vld2q_s16 (const int16_t * __a)
18892 int16x8x2_t ret;
18893 __builtin_aarch64_simd_oi __o;
18894 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
18895 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
18896 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
18897 return ret;
18900 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
18901 vld2q_p16 (const poly16_t * __a)
18903 poly16x8x2_t ret;
18904 __builtin_aarch64_simd_oi __o;
18905 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
18906 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
18907 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
18908 return ret;
18911 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
18912 vld2q_s32 (const int32_t * __a)
18914 int32x4x2_t ret;
18915 __builtin_aarch64_simd_oi __o;
18916 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
18917 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
18918 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
18919 return ret;
18922 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
18923 vld2q_s64 (const int64_t * __a)
18925 int64x2x2_t ret;
18926 __builtin_aarch64_simd_oi __o;
18927 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
18928 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
18929 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
18930 return ret;
18933 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
18934 vld2q_u8 (const uint8_t * __a)
18936 uint8x16x2_t ret;
18937 __builtin_aarch64_simd_oi __o;
18938 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
18939 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
18940 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
18941 return ret;
18944 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
18945 vld2q_u16 (const uint16_t * __a)
18947 uint16x8x2_t ret;
18948 __builtin_aarch64_simd_oi __o;
18949 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
18950 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
18951 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
18952 return ret;
18955 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
18956 vld2q_u32 (const uint32_t * __a)
18958 uint32x4x2_t ret;
18959 __builtin_aarch64_simd_oi __o;
18960 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
18961 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
18962 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
18963 return ret;
18966 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
18967 vld2q_u64 (const uint64_t * __a)
18969 uint64x2x2_t ret;
18970 __builtin_aarch64_simd_oi __o;
18971 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
18972 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
18973 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
18974 return ret;
18977 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
18978 vld2q_f32 (const float32_t * __a)
18980 float32x4x2_t ret;
18981 __builtin_aarch64_simd_oi __o;
18982 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
18983 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
18984 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
18985 return ret;
18988 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
18989 vld2q_f64 (const float64_t * __a)
18991 float64x2x2_t ret;
18992 __builtin_aarch64_simd_oi __o;
18993 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
18994 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
18995 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
18996 return ret;
18999 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
19000 vld3_s64 (const int64_t * __a)
19002 int64x1x3_t ret;
19003 __builtin_aarch64_simd_ci __o;
19004 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19005 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19006 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19007 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19008 return ret;
19011 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
19012 vld3_u64 (const uint64_t * __a)
19014 uint64x1x3_t ret;
19015 __builtin_aarch64_simd_ci __o;
19016 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19017 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19018 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19019 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19020 return ret;
19023 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
19024 vld3_f64 (const float64_t * __a)
19026 float64x1x3_t ret;
19027 __builtin_aarch64_simd_ci __o;
19028 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
19029 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
19030 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
19031 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
19032 return ret;
19035 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
19036 vld3_s8 (const int8_t * __a)
19038 int8x8x3_t ret;
19039 __builtin_aarch64_simd_ci __o;
19040 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19041 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19042 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19043 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19044 return ret;
19047 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
19048 vld3_p8 (const poly8_t * __a)
19050 poly8x8x3_t ret;
19051 __builtin_aarch64_simd_ci __o;
19052 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19053 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19054 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19055 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19056 return ret;
19059 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
19060 vld3_s16 (const int16_t * __a)
19062 int16x4x3_t ret;
19063 __builtin_aarch64_simd_ci __o;
19064 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19065 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19066 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19067 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19068 return ret;
19071 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
19072 vld3_p16 (const poly16_t * __a)
19074 poly16x4x3_t ret;
19075 __builtin_aarch64_simd_ci __o;
19076 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19077 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19078 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19079 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19080 return ret;
19083 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
19084 vld3_s32 (const int32_t * __a)
19086 int32x2x3_t ret;
19087 __builtin_aarch64_simd_ci __o;
19088 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19089 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19090 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19091 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19092 return ret;
19095 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
19096 vld3_u8 (const uint8_t * __a)
19098 uint8x8x3_t ret;
19099 __builtin_aarch64_simd_ci __o;
19100 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19101 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19102 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19103 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19104 return ret;
19107 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
19108 vld3_u16 (const uint16_t * __a)
19110 uint16x4x3_t ret;
19111 __builtin_aarch64_simd_ci __o;
19112 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19113 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19114 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19115 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19116 return ret;
19119 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
19120 vld3_u32 (const uint32_t * __a)
19122 uint32x2x3_t ret;
19123 __builtin_aarch64_simd_ci __o;
19124 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19125 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19126 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19127 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19128 return ret;
19131 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
19132 vld3_f32 (const float32_t * __a)
19134 float32x2x3_t ret;
19135 __builtin_aarch64_simd_ci __o;
19136 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
19137 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
19138 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
19139 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
19140 return ret;
19143 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
19144 vld3q_s8 (const int8_t * __a)
19146 int8x16x3_t ret;
19147 __builtin_aarch64_simd_ci __o;
19148 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19149 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19150 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19151 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19152 return ret;
19155 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
19156 vld3q_p8 (const poly8_t * __a)
19158 poly8x16x3_t ret;
19159 __builtin_aarch64_simd_ci __o;
19160 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19161 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19162 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19163 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19164 return ret;
19167 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
19168 vld3q_s16 (const int16_t * __a)
19170 int16x8x3_t ret;
19171 __builtin_aarch64_simd_ci __o;
19172 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19173 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19174 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19175 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19176 return ret;
19179 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
19180 vld3q_p16 (const poly16_t * __a)
19182 poly16x8x3_t ret;
19183 __builtin_aarch64_simd_ci __o;
19184 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19185 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19186 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19187 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19188 return ret;
19191 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
19192 vld3q_s32 (const int32_t * __a)
19194 int32x4x3_t ret;
19195 __builtin_aarch64_simd_ci __o;
19196 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19197 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19198 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19199 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19200 return ret;
19203 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
19204 vld3q_s64 (const int64_t * __a)
19206 int64x2x3_t ret;
19207 __builtin_aarch64_simd_ci __o;
19208 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19209 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19210 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19211 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19212 return ret;
19215 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
19216 vld3q_u8 (const uint8_t * __a)
19218 uint8x16x3_t ret;
19219 __builtin_aarch64_simd_ci __o;
19220 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19221 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19222 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19223 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19224 return ret;
19227 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
19228 vld3q_u16 (const uint16_t * __a)
19230 uint16x8x3_t ret;
19231 __builtin_aarch64_simd_ci __o;
19232 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19233 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19234 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19235 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19236 return ret;
19239 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
19240 vld3q_u32 (const uint32_t * __a)
19242 uint32x4x3_t ret;
19243 __builtin_aarch64_simd_ci __o;
19244 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19245 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19246 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19247 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19248 return ret;
19251 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
19252 vld3q_u64 (const uint64_t * __a)
19254 uint64x2x3_t ret;
19255 __builtin_aarch64_simd_ci __o;
19256 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19257 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19258 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19259 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19260 return ret;
19263 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
19264 vld3q_f32 (const float32_t * __a)
19266 float32x4x3_t ret;
19267 __builtin_aarch64_simd_ci __o;
19268 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
19269 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
19270 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
19271 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
19272 return ret;
19275 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
19276 vld3q_f64 (const float64_t * __a)
19278 float64x2x3_t ret;
19279 __builtin_aarch64_simd_ci __o;
19280 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
19281 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
19282 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
19283 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
19284 return ret;
19287 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
19288 vld4_s64 (const int64_t * __a)
19290 int64x1x4_t ret;
19291 __builtin_aarch64_simd_xi __o;
19292 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19293 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19294 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19295 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19296 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19297 return ret;
19300 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
19301 vld4_u64 (const uint64_t * __a)
19303 uint64x1x4_t ret;
19304 __builtin_aarch64_simd_xi __o;
19305 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19306 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19307 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19308 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19309 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19310 return ret;
19313 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
19314 vld4_f64 (const float64_t * __a)
19316 float64x1x4_t ret;
19317 __builtin_aarch64_simd_xi __o;
19318 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
19319 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
19320 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
19321 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
19322 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
19323 return ret;
19326 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
19327 vld4_s8 (const int8_t * __a)
19329 int8x8x4_t ret;
19330 __builtin_aarch64_simd_xi __o;
19331 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19332 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19333 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19334 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19335 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19336 return ret;
19339 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
19340 vld4_p8 (const poly8_t * __a)
19342 poly8x8x4_t ret;
19343 __builtin_aarch64_simd_xi __o;
19344 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19345 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19346 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19347 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19348 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19349 return ret;
19352 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
19353 vld4_s16 (const int16_t * __a)
19355 int16x4x4_t ret;
19356 __builtin_aarch64_simd_xi __o;
19357 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19358 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19359 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19360 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19361 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19362 return ret;
19365 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
19366 vld4_p16 (const poly16_t * __a)
19368 poly16x4x4_t ret;
19369 __builtin_aarch64_simd_xi __o;
19370 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19371 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19372 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19373 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19374 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19375 return ret;
19378 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
19379 vld4_s32 (const int32_t * __a)
19381 int32x2x4_t ret;
19382 __builtin_aarch64_simd_xi __o;
19383 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19384 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19385 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19386 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19387 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19388 return ret;
19391 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
19392 vld4_u8 (const uint8_t * __a)
19394 uint8x8x4_t ret;
19395 __builtin_aarch64_simd_xi __o;
19396 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19397 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19398 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19399 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19400 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19401 return ret;
19404 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
19405 vld4_u16 (const uint16_t * __a)
19407 uint16x4x4_t ret;
19408 __builtin_aarch64_simd_xi __o;
19409 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19410 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19411 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19412 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19413 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19414 return ret;
19417 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
19418 vld4_u32 (const uint32_t * __a)
19420 uint32x2x4_t ret;
19421 __builtin_aarch64_simd_xi __o;
19422 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19423 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19424 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19425 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19426 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19427 return ret;
19430 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
19431 vld4_f32 (const float32_t * __a)
19433 float32x2x4_t ret;
19434 __builtin_aarch64_simd_xi __o;
19435 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
19436 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
19437 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
19438 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
19439 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
19440 return ret;
19443 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
19444 vld4q_s8 (const int8_t * __a)
19446 int8x16x4_t ret;
19447 __builtin_aarch64_simd_xi __o;
19448 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19449 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19450 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19451 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19452 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19453 return ret;
19456 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
19457 vld4q_p8 (const poly8_t * __a)
19459 poly8x16x4_t ret;
19460 __builtin_aarch64_simd_xi __o;
19461 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19462 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19463 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19464 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19465 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19466 return ret;
19469 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
19470 vld4q_s16 (const int16_t * __a)
19472 int16x8x4_t ret;
19473 __builtin_aarch64_simd_xi __o;
19474 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19475 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19476 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19477 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19478 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19479 return ret;
19482 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
19483 vld4q_p16 (const poly16_t * __a)
19485 poly16x8x4_t ret;
19486 __builtin_aarch64_simd_xi __o;
19487 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19488 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19489 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19490 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19491 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19492 return ret;
19495 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
19496 vld4q_s32 (const int32_t * __a)
19498 int32x4x4_t ret;
19499 __builtin_aarch64_simd_xi __o;
19500 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19501 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19502 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19503 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19504 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19505 return ret;
19508 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
19509 vld4q_s64 (const int64_t * __a)
19511 int64x2x4_t ret;
19512 __builtin_aarch64_simd_xi __o;
19513 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19514 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19515 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19516 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19517 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19518 return ret;
19521 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
19522 vld4q_u8 (const uint8_t * __a)
19524 uint8x16x4_t ret;
19525 __builtin_aarch64_simd_xi __o;
19526 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19527 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19528 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19529 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19530 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19531 return ret;
19534 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
19535 vld4q_u16 (const uint16_t * __a)
19537 uint16x8x4_t ret;
19538 __builtin_aarch64_simd_xi __o;
19539 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19540 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19541 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19542 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19543 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19544 return ret;
19547 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
19548 vld4q_u32 (const uint32_t * __a)
19550 uint32x4x4_t ret;
19551 __builtin_aarch64_simd_xi __o;
19552 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19553 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19554 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19555 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19556 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19557 return ret;
19560 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
19561 vld4q_u64 (const uint64_t * __a)
19563 uint64x2x4_t ret;
19564 __builtin_aarch64_simd_xi __o;
19565 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19566 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19567 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19568 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19569 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19570 return ret;
19573 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
19574 vld4q_f32 (const float32_t * __a)
19576 float32x4x4_t ret;
19577 __builtin_aarch64_simd_xi __o;
19578 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
19579 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
19580 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
19581 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
19582 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
19583 return ret;
19586 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
19587 vld4q_f64 (const float64_t * __a)
19589 float64x2x4_t ret;
19590 __builtin_aarch64_simd_xi __o;
19591 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
19592 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
19593 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
19594 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
19595 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
19596 return ret;
19599 /* vmax */
19601 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19602 vmax_f32 (float32x2_t __a, float32x2_t __b)
19604 return __builtin_aarch64_smax_nanv2sf (__a, __b);
19607 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19608 vmax_s8 (int8x8_t __a, int8x8_t __b)
19610 return __builtin_aarch64_smaxv8qi (__a, __b);
19613 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19614 vmax_s16 (int16x4_t __a, int16x4_t __b)
19616 return __builtin_aarch64_smaxv4hi (__a, __b);
19619 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19620 vmax_s32 (int32x2_t __a, int32x2_t __b)
19622 return __builtin_aarch64_smaxv2si (__a, __b);
19625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19626 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
19628 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
19629 (int8x8_t) __b);
19632 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19633 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
19635 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
19636 (int16x4_t) __b);
19639 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19640 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
19642 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
19643 (int32x2_t) __b);
19646 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19647 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
19649 return __builtin_aarch64_smax_nanv4sf (__a, __b);
19652 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19653 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
19655 return __builtin_aarch64_smax_nanv2df (__a, __b);
19658 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19659 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
19661 return __builtin_aarch64_smaxv16qi (__a, __b);
19664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19665 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
19667 return __builtin_aarch64_smaxv8hi (__a, __b);
19670 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19671 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
19673 return __builtin_aarch64_smaxv4si (__a, __b);
19676 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19677 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
19679 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
19680 (int8x16_t) __b);
19683 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19684 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
19686 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
19687 (int16x8_t) __b);
19690 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19691 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
19693 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
19694 (int32x4_t) __b);
19697 /* vmaxnm */
19699 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19700 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
19702 return __builtin_aarch64_smaxv2sf (__a, __b);
19705 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19706 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
19708 return __builtin_aarch64_smaxv4sf (__a, __b);
19711 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19712 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
19714 return __builtin_aarch64_smaxv2df (__a, __b);
19717 /* vmaxv */
19719 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19720 vmaxv_f32 (float32x2_t __a)
19722 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
19726 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19727 vmaxv_s8 (int8x8_t __a)
19729 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
19732 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19733 vmaxv_s16 (int16x4_t __a)
19735 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
19738 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19739 vmaxv_s32 (int32x2_t __a)
19741 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
19744 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19745 vmaxv_u8 (uint8x8_t __a)
19747 return vget_lane_u8 ((uint8x8_t)
19748 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
19752 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19753 vmaxv_u16 (uint16x4_t __a)
19755 return vget_lane_u16 ((uint16x4_t)
19756 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
19760 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19761 vmaxv_u32 (uint32x2_t __a)
19763 return vget_lane_u32 ((uint32x2_t)
19764 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
19768 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19769 vmaxvq_f32 (float32x4_t __a)
19771 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
19775 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19776 vmaxvq_f64 (float64x2_t __a)
19778 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
19782 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19783 vmaxvq_s8 (int8x16_t __a)
19785 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
19788 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19789 vmaxvq_s16 (int16x8_t __a)
19791 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
19794 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19795 vmaxvq_s32 (int32x4_t __a)
19797 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
19800 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19801 vmaxvq_u8 (uint8x16_t __a)
19803 return vgetq_lane_u8 ((uint8x16_t)
19804 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
19808 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19809 vmaxvq_u16 (uint16x8_t __a)
19811 return vgetq_lane_u16 ((uint16x8_t)
19812 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
19816 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19817 vmaxvq_u32 (uint32x4_t __a)
19819 return vgetq_lane_u32 ((uint32x4_t)
19820 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
19824 /* vmaxnmv */
19826 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19827 vmaxnmv_f32 (float32x2_t __a)
19829 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
19833 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19834 vmaxnmvq_f32 (float32x4_t __a)
19836 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
19839 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19840 vmaxnmvq_f64 (float64x2_t __a)
19842 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
19845 /* vmin */
19847 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19848 vmin_f32 (float32x2_t __a, float32x2_t __b)
19850 return __builtin_aarch64_smin_nanv2sf (__a, __b);
19853 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19854 vmin_s8 (int8x8_t __a, int8x8_t __b)
19856 return __builtin_aarch64_sminv8qi (__a, __b);
19859 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19860 vmin_s16 (int16x4_t __a, int16x4_t __b)
19862 return __builtin_aarch64_sminv4hi (__a, __b);
19865 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19866 vmin_s32 (int32x2_t __a, int32x2_t __b)
19868 return __builtin_aarch64_sminv2si (__a, __b);
19871 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19872 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
19874 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
19875 (int8x8_t) __b);
19878 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19879 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
19881 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
19882 (int16x4_t) __b);
19885 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19886 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
19888 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
19889 (int32x2_t) __b);
19892 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19893 vminq_f32 (float32x4_t __a, float32x4_t __b)
19895 return __builtin_aarch64_smin_nanv4sf (__a, __b);
19898 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19899 vminq_f64 (float64x2_t __a, float64x2_t __b)
19901 return __builtin_aarch64_smin_nanv2df (__a, __b);
19904 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19905 vminq_s8 (int8x16_t __a, int8x16_t __b)
19907 return __builtin_aarch64_sminv16qi (__a, __b);
19910 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19911 vminq_s16 (int16x8_t __a, int16x8_t __b)
19913 return __builtin_aarch64_sminv8hi (__a, __b);
19916 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19917 vminq_s32 (int32x4_t __a, int32x4_t __b)
19919 return __builtin_aarch64_sminv4si (__a, __b);
19922 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19923 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
19925 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
19926 (int8x16_t) __b);
19929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19930 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
19932 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
19933 (int16x8_t) __b);
19936 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19937 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
19939 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
19940 (int32x4_t) __b);
19943 /* vminnm */
19945 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19946 vminnm_f32 (float32x2_t __a, float32x2_t __b)
19948 return __builtin_aarch64_sminv2sf (__a, __b);
19951 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19952 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
19954 return __builtin_aarch64_sminv4sf (__a, __b);
19957 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19958 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
19960 return __builtin_aarch64_sminv2df (__a, __b);
19963 /* vminv */
19965 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19966 vminv_f32 (float32x2_t __a)
19968 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
19972 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19973 vminv_s8 (int8x8_t __a)
19975 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
19979 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19980 vminv_s16 (int16x4_t __a)
19982 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
19985 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19986 vminv_s32 (int32x2_t __a)
19988 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
19991 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19992 vminv_u8 (uint8x8_t __a)
19994 return vget_lane_u8 ((uint8x8_t)
19995 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
19999 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20000 vminv_u16 (uint16x4_t __a)
20002 return vget_lane_u16 ((uint16x4_t)
20003 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
20007 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20008 vminv_u32 (uint32x2_t __a)
20010 return vget_lane_u32 ((uint32x2_t)
20011 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
20015 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20016 vminvq_f32 (float32x4_t __a)
20018 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
20022 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20023 vminvq_f64 (float64x2_t __a)
20025 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
20029 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20030 vminvq_s8 (int8x16_t __a)
20032 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
20035 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20036 vminvq_s16 (int16x8_t __a)
20038 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
20041 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20042 vminvq_s32 (int32x4_t __a)
20044 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
20047 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20048 vminvq_u8 (uint8x16_t __a)
20050 return vgetq_lane_u8 ((uint8x16_t)
20051 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
20055 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20056 vminvq_u16 (uint16x8_t __a)
20058 return vgetq_lane_u16 ((uint16x8_t)
20059 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
20063 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20064 vminvq_u32 (uint32x4_t __a)
20066 return vgetq_lane_u32 ((uint32x4_t)
20067 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
20071 /* vminnmv */
20073 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20074 vminnmv_f32 (float32x2_t __a)
20076 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
20079 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20080 vminnmvq_f32 (float32x4_t __a)
20082 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
20085 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20086 vminnmvq_f64 (float64x2_t __a)
20088 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
20091 /* vmla */
20093 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20094 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20096 return a + b * c;
20099 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20100 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20102 return a + b * c;
20105 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20106 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20108 return a + b * c;
20111 /* vmla_lane */
20113 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20114 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
20115 float32x2_t __c, const int __lane)
20117 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20120 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20121 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
20122 int16x4_t __c, const int __lane)
20124 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20127 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20128 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
20129 int32x2_t __c, const int __lane)
20131 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20134 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20135 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20136 uint16x4_t __c, const int __lane)
20138 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20142 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20143 uint32x2_t __c, const int __lane)
20145 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20148 /* vmla_laneq */
20150 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20151 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
20152 float32x4_t __c, const int __lane)
20154 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20157 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20158 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
20159 int16x8_t __c, const int __lane)
20161 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20164 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20165 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
20166 int32x4_t __c, const int __lane)
20168 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20171 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20172 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20173 uint16x8_t __c, const int __lane)
20175 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20178 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20179 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20180 uint32x4_t __c, const int __lane)
20182 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20185 /* vmlaq_lane */
20187 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20188 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
20189 float32x2_t __c, const int __lane)
20191 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20194 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20195 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
20196 int16x4_t __c, const int __lane)
20198 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20201 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20202 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
20203 int32x2_t __c, const int __lane)
20205 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20208 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20209 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20210 uint16x4_t __c, const int __lane)
20212 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20215 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20216 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20217 uint32x2_t __c, const int __lane)
20219 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20222 /* vmlaq_laneq */
20224 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20225 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20226 float32x4_t __c, const int __lane)
20228 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20231 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20232 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20233 int16x8_t __c, const int __lane)
20235 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20238 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20239 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20240 int32x4_t __c, const int __lane)
20242 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20245 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20246 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20247 uint16x8_t __c, const int __lane)
20249 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20252 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20253 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20254 uint32x4_t __c, const int __lane)
20256 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20259 /* vmls */
20261 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20262 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20264 return a - b * c;
20267 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20268 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20270 return a - b * c;
20273 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20274 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20276 return a - b * c;
20279 /* vmls_lane */
20281 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20282 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
20283 float32x2_t __c, const int __lane)
20285 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20288 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20289 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
20290 int16x4_t __c, const int __lane)
20292 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20295 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20296 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
20297 int32x2_t __c, const int __lane)
20299 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20303 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20304 uint16x4_t __c, const int __lane)
20306 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20309 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20310 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20311 uint32x2_t __c, const int __lane)
20313 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20316 /* vmls_laneq */
20318 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20319 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
20320 float32x4_t __c, const int __lane)
20322 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20325 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20326 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
20327 int16x8_t __c, const int __lane)
20329 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20332 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20333 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
20334 int32x4_t __c, const int __lane)
20336 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20339 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20340 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20341 uint16x8_t __c, const int __lane)
20343 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20347 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20348 uint32x4_t __c, const int __lane)
20350 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20353 /* vmlsq_lane */
20355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20356 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
20357 float32x2_t __c, const int __lane)
20359 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20363 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
20364 int16x4_t __c, const int __lane)
20366 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20370 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
20371 int32x2_t __c, const int __lane)
20373 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20377 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20378 uint16x4_t __c, const int __lane)
20380 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20383 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20384 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20385 uint32x2_t __c, const int __lane)
20387 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20390 /* vmlsq_laneq */
20392 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20393 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20394 float32x4_t __c, const int __lane)
20396 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20399 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20400 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20401 int16x8_t __c, const int __lane)
20403 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20406 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20407 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20408 int32x4_t __c, const int __lane)
20410 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20412 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20413 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20414 uint16x8_t __c, const int __lane)
20416 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20419 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20420 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20421 uint32x4_t __c, const int __lane)
20423 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20426 /* vmov_n_ */
20428 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20429 vmov_n_f32 (float32_t __a)
20431 return vdup_n_f32 (__a);
20434 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20435 vmov_n_f64 (float64_t __a)
20437 return __a;
20440 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20441 vmov_n_p8 (poly8_t __a)
20443 return vdup_n_p8 (__a);
20446 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20447 vmov_n_p16 (poly16_t __a)
20449 return vdup_n_p16 (__a);
20452 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20453 vmov_n_s8 (int8_t __a)
20455 return vdup_n_s8 (__a);
20458 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20459 vmov_n_s16 (int16_t __a)
20461 return vdup_n_s16 (__a);
20464 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20465 vmov_n_s32 (int32_t __a)
20467 return vdup_n_s32 (__a);
20470 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20471 vmov_n_s64 (int64_t __a)
20473 return __a;
20476 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20477 vmov_n_u8 (uint8_t __a)
20479 return vdup_n_u8 (__a);
20482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20483 vmov_n_u16 (uint16_t __a)
20485 return vdup_n_u16 (__a);
20488 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20489 vmov_n_u32 (uint32_t __a)
20491 return vdup_n_u32 (__a);
20494 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20495 vmov_n_u64 (uint64_t __a)
20497 return __a;
20500 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20501 vmovq_n_f32 (float32_t __a)
20503 return vdupq_n_f32 (__a);
20506 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20507 vmovq_n_f64 (float64_t __a)
20509 return vdupq_n_f64 (__a);
20512 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20513 vmovq_n_p8 (poly8_t __a)
20515 return vdupq_n_p8 (__a);
20518 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20519 vmovq_n_p16 (poly16_t __a)
20521 return vdupq_n_p16 (__a);
20524 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20525 vmovq_n_s8 (int8_t __a)
20527 return vdupq_n_s8 (__a);
20530 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20531 vmovq_n_s16 (int16_t __a)
20533 return vdupq_n_s16 (__a);
20536 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20537 vmovq_n_s32 (int32_t __a)
20539 return vdupq_n_s32 (__a);
20542 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20543 vmovq_n_s64 (int64_t __a)
20545 return vdupq_n_s64 (__a);
20548 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20549 vmovq_n_u8 (uint8_t __a)
20551 return vdupq_n_u8 (__a);
20554 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20555 vmovq_n_u16 (uint16_t __a)
20557 return vdupq_n_u16 (__a);
20560 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20561 vmovq_n_u32 (uint32_t __a)
20563 return vdupq_n_u32 (__a);
20566 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20567 vmovq_n_u64 (uint64_t __a)
20569 return vdupq_n_u64 (__a);
20572 /* vmul_lane */
20574 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20575 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
20577 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20580 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20581 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
20583 return __a * __b;
20586 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20587 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
20589 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20593 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
20595 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20598 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20599 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
20601 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20604 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20605 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
20607 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20610 /* vmul_laneq */
20612 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20613 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
20615 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20618 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20619 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
20621 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20624 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20625 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
20627 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20630 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20631 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
20633 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20636 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20637 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
20639 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20642 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20643 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
20645 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20648 /* vmulq_lane */
20650 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20651 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
20653 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20656 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20657 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
20659 return __a * __b;
20662 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20663 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
20665 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20669 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
20671 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20674 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20675 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
20677 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20681 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
20683 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20686 /* vmulq_laneq */
20688 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20689 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
20691 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20694 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20695 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
20697 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20701 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
20703 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20706 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20707 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
20709 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20712 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20713 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
20715 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20718 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20719 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
20721 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20724 /* vneg */
20726 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20727 vneg_f32 (float32x2_t __a)
20729 return -__a;
20732 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20733 vneg_f64 (float64x1_t __a)
20735 return -__a;
20738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20739 vneg_s8 (int8x8_t __a)
20741 return -__a;
20744 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20745 vneg_s16 (int16x4_t __a)
20747 return -__a;
20750 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20751 vneg_s32 (int32x2_t __a)
20753 return -__a;
20756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20757 vneg_s64 (int64x1_t __a)
20759 return -__a;
20762 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20763 vnegq_f32 (float32x4_t __a)
20765 return -__a;
20768 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20769 vnegq_f64 (float64x2_t __a)
20771 return -__a;
20774 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20775 vnegq_s8 (int8x16_t __a)
20777 return -__a;
20780 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20781 vnegq_s16 (int16x8_t __a)
20783 return -__a;
20786 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20787 vnegq_s32 (int32x4_t __a)
20789 return -__a;
20792 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20793 vnegq_s64 (int64x2_t __a)
20795 return -__a;
20798 /* vqabs */
20800 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20801 vqabsq_s64 (int64x2_t __a)
20803 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
20806 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20807 vqabsb_s8 (int8x1_t __a)
20809 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
20812 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20813 vqabsh_s16 (int16x1_t __a)
20815 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
20818 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20819 vqabss_s32 (int32x1_t __a)
20821 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
20824 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20825 vqabsd_s64 (int64_t __a)
20827 return __builtin_aarch64_sqabsdi (__a);
20830 /* vqadd */
20832 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20833 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
20835 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
20838 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20839 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
20841 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
20844 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20845 vqadds_s32 (int32x1_t __a, int32x1_t __b)
20847 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
20850 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20851 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
20853 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
20856 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20857 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
20859 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
20862 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20863 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
20865 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
20868 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20869 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
20871 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
20874 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20875 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
20877 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
20880 /* vqdmlal */
20882 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20883 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
20885 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
20888 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20889 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
20891 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
20894 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20895 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
20896 int const __d)
20898 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
20901 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20902 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
20903 int const __d)
20905 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
20908 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20909 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
20911 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
20914 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20915 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
20917 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
20918 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
20921 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20922 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
20924 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
20927 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20928 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
20930 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
20933 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20934 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
20936 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
20939 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20940 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
20942 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
20945 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20946 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
20947 int const __d)
20949 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
20952 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20953 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
20954 int const __d)
20956 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
20959 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20960 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
20962 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
20965 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20966 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
20968 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
20969 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
20972 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20973 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
20975 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
20978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20979 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
20981 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
20984 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20985 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
20987 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
20990 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20991 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
20993 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
20996 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20997 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
20999 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21002 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21003 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21005 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21008 /* vqdmlsl */
21010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21011 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21013 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21017 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21019 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21022 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21023 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21024 int const __d)
21026 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21029 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21030 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21031 int const __d)
21033 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21037 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21039 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21043 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21045 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21046 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21049 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21050 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21052 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21056 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21058 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21062 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21064 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21067 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21068 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21070 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21073 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21074 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21075 int const __d)
21077 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21080 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21081 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21082 int const __d)
21084 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21088 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21090 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21094 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21096 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21097 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21100 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21101 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21103 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21106 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21107 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21109 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21112 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21113 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21115 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21118 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21119 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21121 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21124 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21125 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21127 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21130 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21131 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21133 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21136 /* vqdmulh */
21138 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21139 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21141 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21144 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21145 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21147 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21150 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21151 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21153 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21156 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21157 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21159 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21162 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21163 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21165 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21168 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21169 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21171 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21174 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21175 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21177 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21180 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21181 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21183 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21186 /* vqdmull */
21188 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21189 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21191 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21194 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21195 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21197 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21200 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21201 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21203 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21206 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21207 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21209 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21212 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21213 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21215 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21218 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21219 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21221 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
21222 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21225 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21226 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21228 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21231 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21232 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21234 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21237 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21238 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21240 return __builtin_aarch64_sqdmullv2si (__a, __b);
21243 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21244 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21246 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21249 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21250 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21252 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21255 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21256 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21258 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21261 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21262 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21264 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21267 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21268 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21270 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
21271 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21275 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21277 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21280 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21281 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21283 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21286 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21287 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21289 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21292 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21293 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21295 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21298 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21299 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21301 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21304 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21305 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21307 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21310 /* vqmovn */
21312 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21313 vqmovn_s16 (int16x8_t __a)
21315 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21318 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21319 vqmovn_s32 (int32x4_t __a)
21321 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21325 vqmovn_s64 (int64x2_t __a)
21327 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21330 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21331 vqmovn_u16 (uint16x8_t __a)
21333 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21336 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21337 vqmovn_u32 (uint32x4_t __a)
21339 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21342 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21343 vqmovn_u64 (uint64x2_t __a)
21345 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21348 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21349 vqmovnh_s16 (int16x1_t __a)
21351 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21354 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21355 vqmovns_s32 (int32x1_t __a)
21357 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21360 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21361 vqmovnd_s64 (int64x1_t __a)
21363 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21366 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21367 vqmovnh_u16 (uint16x1_t __a)
21369 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21372 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21373 vqmovns_u32 (uint32x1_t __a)
21375 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21378 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21379 vqmovnd_u64 (uint64x1_t __a)
21381 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21384 /* vqmovun */
21386 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21387 vqmovun_s16 (int16x8_t __a)
21389 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21392 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21393 vqmovun_s32 (int32x4_t __a)
21395 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21398 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21399 vqmovun_s64 (int64x2_t __a)
21401 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21404 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21405 vqmovunh_s16 (int16x1_t __a)
21407 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21410 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21411 vqmovuns_s32 (int32x1_t __a)
21413 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21416 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21417 vqmovund_s64 (int64x1_t __a)
21419 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21422 /* vqneg */
21424 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21425 vqnegq_s64 (int64x2_t __a)
21427 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21430 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21431 vqnegb_s8 (int8x1_t __a)
21433 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21436 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21437 vqnegh_s16 (int16x1_t __a)
21439 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21442 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21443 vqnegs_s32 (int32x1_t __a)
21445 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21448 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21449 vqnegd_s64 (int64_t __a)
21451 return __builtin_aarch64_sqnegdi (__a);
21454 /* vqrdmulh */
21456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21457 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21459 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21463 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21465 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21468 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21469 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21471 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21474 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21475 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21477 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21480 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21481 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21483 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21486 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21487 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21489 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21492 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21493 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21495 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21498 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21499 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21501 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21504 /* vqrshl */
21506 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21507 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
21509 return __builtin_aarch64_sqrshlv8qi (__a, __b);
21512 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21513 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
21515 return __builtin_aarch64_sqrshlv4hi (__a, __b);
21518 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21519 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
21521 return __builtin_aarch64_sqrshlv2si (__a, __b);
21524 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21525 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
21527 return __builtin_aarch64_sqrshldi (__a, __b);
21530 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21531 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
21533 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
21536 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21537 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
21539 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
21542 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21543 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
21545 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
21548 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21549 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
21551 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
21554 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21555 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
21557 return __builtin_aarch64_sqrshlv16qi (__a, __b);
21560 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21561 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
21563 return __builtin_aarch64_sqrshlv8hi (__a, __b);
21566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21567 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
21569 return __builtin_aarch64_sqrshlv4si (__a, __b);
21572 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21573 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
21575 return __builtin_aarch64_sqrshlv2di (__a, __b);
21578 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21579 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21581 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
21584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21585 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21587 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
21590 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21591 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21593 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
21596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21597 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21599 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
21602 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21603 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
21605 return __builtin_aarch64_sqrshlqi (__a, __b);
21608 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21609 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
21611 return __builtin_aarch64_sqrshlhi (__a, __b);
21614 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21615 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
21617 return __builtin_aarch64_sqrshlsi (__a, __b);
21620 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21621 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
21623 return __builtin_aarch64_sqrshldi (__a, __b);
21626 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21627 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21629 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
21632 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21633 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21635 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
21638 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21639 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
21641 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
21644 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21645 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
21647 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
21650 /* vqrshrn */
21652 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21653 vqrshrn_n_s16 (int16x8_t __a, const int __b)
21655 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
21658 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21659 vqrshrn_n_s32 (int32x4_t __a, const int __b)
21661 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
21664 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21665 vqrshrn_n_s64 (int64x2_t __a, const int __b)
21667 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
21670 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21671 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
21673 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
21676 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21677 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
21679 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
21682 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21683 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
21685 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
21688 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21689 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
21691 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
21694 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21695 vqrshrns_n_s32 (int32x1_t __a, const int __b)
21697 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
21700 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21701 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
21703 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
21706 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21707 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
21709 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
21712 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21713 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
21715 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
21718 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21719 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
21721 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
21724 /* vqrshrun */
21726 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21727 vqrshrun_n_s16 (int16x8_t __a, const int __b)
21729 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
21732 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21733 vqrshrun_n_s32 (int32x4_t __a, const int __b)
21735 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
21738 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21739 vqrshrun_n_s64 (int64x2_t __a, const int __b)
21741 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
21744 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21745 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
21747 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
21750 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21751 vqrshruns_n_s32 (int32x1_t __a, const int __b)
21753 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
21756 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21757 vqrshrund_n_s64 (int64x1_t __a, const int __b)
21759 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
21762 /* vqshl */
21764 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21765 vqshl_s8 (int8x8_t __a, int8x8_t __b)
21767 return __builtin_aarch64_sqshlv8qi (__a, __b);
21770 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21771 vqshl_s16 (int16x4_t __a, int16x4_t __b)
21773 return __builtin_aarch64_sqshlv4hi (__a, __b);
21776 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21777 vqshl_s32 (int32x2_t __a, int32x2_t __b)
21779 return __builtin_aarch64_sqshlv2si (__a, __b);
21782 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21783 vqshl_s64 (int64x1_t __a, int64x1_t __b)
21785 return __builtin_aarch64_sqshldi (__a, __b);
21788 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21789 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
21791 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
21794 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21795 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
21797 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
21800 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21801 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
21803 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
21806 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21807 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
21809 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
21812 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21813 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
21815 return __builtin_aarch64_sqshlv16qi (__a, __b);
21818 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21819 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
21821 return __builtin_aarch64_sqshlv8hi (__a, __b);
21824 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21825 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
21827 return __builtin_aarch64_sqshlv4si (__a, __b);
21830 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21831 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
21833 return __builtin_aarch64_sqshlv2di (__a, __b);
21836 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21837 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
21839 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
21842 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21843 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
21845 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
21848 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21849 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
21851 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
21854 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21855 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
21857 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
21860 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21861 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
21863 return __builtin_aarch64_sqshlqi (__a, __b);
21866 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21867 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
21869 return __builtin_aarch64_sqshlhi (__a, __b);
21872 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21873 vqshls_s32 (int32x1_t __a, int32x1_t __b)
21875 return __builtin_aarch64_sqshlsi (__a, __b);
21878 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21879 vqshld_s64 (int64x1_t __a, int64x1_t __b)
21881 return __builtin_aarch64_sqshldi (__a, __b);
21884 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21885 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21887 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
21890 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21891 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21893 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
21896 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21897 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
21899 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
21902 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21903 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
21905 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
21908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21909 vqshl_n_s8 (int8x8_t __a, const int __b)
21911 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
21914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21915 vqshl_n_s16 (int16x4_t __a, const int __b)
21917 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
21920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21921 vqshl_n_s32 (int32x2_t __a, const int __b)
21923 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
21926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21927 vqshl_n_s64 (int64x1_t __a, const int __b)
21929 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
21932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21933 vqshl_n_u8 (uint8x8_t __a, const int __b)
21935 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
21938 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21939 vqshl_n_u16 (uint16x4_t __a, const int __b)
21941 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
21944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21945 vqshl_n_u32 (uint32x2_t __a, const int __b)
21947 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
21950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21951 vqshl_n_u64 (uint64x1_t __a, const int __b)
21953 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
21956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21957 vqshlq_n_s8 (int8x16_t __a, const int __b)
21959 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
21962 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21963 vqshlq_n_s16 (int16x8_t __a, const int __b)
21965 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
21968 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21969 vqshlq_n_s32 (int32x4_t __a, const int __b)
21971 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
21974 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21975 vqshlq_n_s64 (int64x2_t __a, const int __b)
21977 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
21980 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21981 vqshlq_n_u8 (uint8x16_t __a, const int __b)
21983 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
21986 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21987 vqshlq_n_u16 (uint16x8_t __a, const int __b)
21989 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
21992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21993 vqshlq_n_u32 (uint32x4_t __a, const int __b)
21995 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
21998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21999 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22001 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22004 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22005 vqshlb_n_s8 (int8x1_t __a, const int __b)
22007 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22010 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22011 vqshlh_n_s16 (int16x1_t __a, const int __b)
22013 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22016 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22017 vqshls_n_s32 (int32x1_t __a, const int __b)
22019 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22022 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22023 vqshld_n_s64 (int64x1_t __a, const int __b)
22025 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22028 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22029 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22031 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22034 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22035 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22037 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22040 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22041 vqshls_n_u32 (uint32x1_t __a, const int __b)
22043 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22046 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22047 vqshld_n_u64 (uint64x1_t __a, const int __b)
22049 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22052 /* vqshlu */
22054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22055 vqshlu_n_s8 (int8x8_t __a, const int __b)
22057 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22060 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22061 vqshlu_n_s16 (int16x4_t __a, const int __b)
22063 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22066 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22067 vqshlu_n_s32 (int32x2_t __a, const int __b)
22069 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22072 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22073 vqshlu_n_s64 (int64x1_t __a, const int __b)
22075 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22078 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22079 vqshluq_n_s8 (int8x16_t __a, const int __b)
22081 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22085 vqshluq_n_s16 (int16x8_t __a, const int __b)
22087 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22090 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22091 vqshluq_n_s32 (int32x4_t __a, const int __b)
22093 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22096 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22097 vqshluq_n_s64 (int64x2_t __a, const int __b)
22099 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22102 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22103 vqshlub_n_s8 (int8x1_t __a, const int __b)
22105 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22108 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22109 vqshluh_n_s16 (int16x1_t __a, const int __b)
22111 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22114 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22115 vqshlus_n_s32 (int32x1_t __a, const int __b)
22117 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22120 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22121 vqshlud_n_s64 (int64x1_t __a, const int __b)
22123 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22126 /* vqshrn */
22128 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22129 vqshrn_n_s16 (int16x8_t __a, const int __b)
22131 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22134 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22135 vqshrn_n_s32 (int32x4_t __a, const int __b)
22137 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22140 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22141 vqshrn_n_s64 (int64x2_t __a, const int __b)
22143 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22147 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22149 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22152 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22153 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22155 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22159 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22161 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22164 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22165 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22167 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22170 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22171 vqshrns_n_s32 (int32x1_t __a, const int __b)
22173 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22176 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22177 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22179 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22182 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22183 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22185 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22188 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22189 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22191 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22194 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22195 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22197 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22200 /* vqshrun */
22202 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22203 vqshrun_n_s16 (int16x8_t __a, const int __b)
22205 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22208 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22209 vqshrun_n_s32 (int32x4_t __a, const int __b)
22211 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22214 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22215 vqshrun_n_s64 (int64x2_t __a, const int __b)
22217 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22220 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22221 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22223 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22226 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22227 vqshruns_n_s32 (int32x1_t __a, const int __b)
22229 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22232 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22233 vqshrund_n_s64 (int64x1_t __a, const int __b)
22235 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22238 /* vqsub */
22240 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22241 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22243 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22246 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22247 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22249 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22252 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22253 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22255 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22258 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22259 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22261 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22264 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22265 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22267 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22270 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22271 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22273 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22276 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22277 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22279 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22282 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22283 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22285 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22288 /* vrecpe */
22290 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22291 vrecpes_f32 (float32_t __a)
22293 return __builtin_aarch64_frecpesf (__a);
22296 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22297 vrecped_f64 (float64_t __a)
22299 return __builtin_aarch64_frecpedf (__a);
22302 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22303 vrecpe_f32 (float32x2_t __a)
22305 return __builtin_aarch64_frecpev2sf (__a);
22308 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22309 vrecpeq_f32 (float32x4_t __a)
22311 return __builtin_aarch64_frecpev4sf (__a);
22314 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22315 vrecpeq_f64 (float64x2_t __a)
22317 return __builtin_aarch64_frecpev2df (__a);
22320 /* vrecps */
22322 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22323 vrecpss_f32 (float32_t __a, float32_t __b)
22325 return __builtin_aarch64_frecpssf (__a, __b);
22328 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22329 vrecpsd_f64 (float64_t __a, float64_t __b)
22331 return __builtin_aarch64_frecpsdf (__a, __b);
22334 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22335 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22337 return __builtin_aarch64_frecpsv2sf (__a, __b);
22340 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22341 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22343 return __builtin_aarch64_frecpsv4sf (__a, __b);
22346 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22347 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22349 return __builtin_aarch64_frecpsv2df (__a, __b);
22352 /* vrecpx */
22354 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22355 vrecpxs_f32 (float32_t __a)
22357 return __builtin_aarch64_frecpxsf (__a);
22360 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22361 vrecpxd_f64 (float64_t __a)
22363 return __builtin_aarch64_frecpxdf (__a);
22366 /* vrnd */
22368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22369 vrnd_f32 (float32x2_t __a)
22371 return __builtin_aarch64_btruncv2sf (__a);
22374 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22375 vrnd_f64 (float64x1_t __a)
22377 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
22380 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22381 vrndq_f32 (float32x4_t __a)
22383 return __builtin_aarch64_btruncv4sf (__a);
22386 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22387 vrndq_f64 (float64x2_t __a)
22389 return __builtin_aarch64_btruncv2df (__a);
22392 /* vrnda */
22394 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22395 vrnda_f32 (float32x2_t __a)
22397 return __builtin_aarch64_roundv2sf (__a);
22400 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22401 vrnda_f64 (float64x1_t __a)
22403 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
22406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22407 vrndaq_f32 (float32x4_t __a)
22409 return __builtin_aarch64_roundv4sf (__a);
22412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22413 vrndaq_f64 (float64x2_t __a)
22415 return __builtin_aarch64_roundv2df (__a);
22418 /* vrndi */
22420 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22421 vrndi_f32 (float32x2_t __a)
22423 return __builtin_aarch64_nearbyintv2sf (__a);
22426 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22427 vrndi_f64 (float64x1_t __a)
22429 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
22432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22433 vrndiq_f32 (float32x4_t __a)
22435 return __builtin_aarch64_nearbyintv4sf (__a);
22438 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22439 vrndiq_f64 (float64x2_t __a)
22441 return __builtin_aarch64_nearbyintv2df (__a);
22444 /* vrndm */
22446 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22447 vrndm_f32 (float32x2_t __a)
22449 return __builtin_aarch64_floorv2sf (__a);
22452 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22453 vrndm_f64 (float64x1_t __a)
22455 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
22458 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22459 vrndmq_f32 (float32x4_t __a)
22461 return __builtin_aarch64_floorv4sf (__a);
22464 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22465 vrndmq_f64 (float64x2_t __a)
22467 return __builtin_aarch64_floorv2df (__a);
22470 /* vrndn */
22472 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22473 vrndn_f32 (float32x2_t __a)
22475 return __builtin_aarch64_frintnv2sf (__a);
22478 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22479 vrndn_f64 (float64x1_t __a)
22481 return __builtin_aarch64_frintndf (__a);
22484 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22485 vrndnq_f32 (float32x4_t __a)
22487 return __builtin_aarch64_frintnv4sf (__a);
22490 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22491 vrndnq_f64 (float64x2_t __a)
22493 return __builtin_aarch64_frintnv2df (__a);
22496 /* vrndp */
22498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22499 vrndp_f32 (float32x2_t __a)
22501 return __builtin_aarch64_ceilv2sf (__a);
22504 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22505 vrndp_f64 (float64x1_t __a)
22507 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
22510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22511 vrndpq_f32 (float32x4_t __a)
22513 return __builtin_aarch64_ceilv4sf (__a);
22516 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22517 vrndpq_f64 (float64x2_t __a)
22519 return __builtin_aarch64_ceilv2df (__a);
22522 /* vrndx */
22524 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22525 vrndx_f32 (float32x2_t __a)
22527 return __builtin_aarch64_rintv2sf (__a);
22530 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22531 vrndx_f64 (float64x1_t __a)
22533 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
22536 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22537 vrndxq_f32 (float32x4_t __a)
22539 return __builtin_aarch64_rintv4sf (__a);
22542 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22543 vrndxq_f64 (float64x2_t __a)
22545 return __builtin_aarch64_rintv2df (__a);
22548 /* vrshl */
22550 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22551 vrshl_s8 (int8x8_t __a, int8x8_t __b)
22553 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
22556 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22557 vrshl_s16 (int16x4_t __a, int16x4_t __b)
22559 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
22562 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22563 vrshl_s32 (int32x2_t __a, int32x2_t __b)
22565 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
22568 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22569 vrshl_s64 (int64x1_t __a, int64x1_t __b)
22571 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22574 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22575 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
22577 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
22580 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22581 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
22583 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
22586 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22587 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
22589 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
22592 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22593 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
22595 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
22598 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22599 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
22601 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
22604 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22605 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
22607 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
22610 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22611 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
22613 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
22616 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22617 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
22619 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
22622 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22623 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22625 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
22628 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22629 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22631 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
22634 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22635 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22637 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
22640 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22641 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22643 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
22646 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22647 vrshld_s64 (int64x1_t __a, int64x1_t __b)
22649 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22652 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22653 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22655 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
22658 /* vrshr */
22660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22661 vrshr_n_s8 (int8x8_t __a, const int __b)
22663 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
22666 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22667 vrshr_n_s16 (int16x4_t __a, const int __b)
22669 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
22672 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22673 vrshr_n_s32 (int32x2_t __a, const int __b)
22675 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
22678 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22679 vrshr_n_s64 (int64x1_t __a, const int __b)
22681 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22684 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22685 vrshr_n_u8 (uint8x8_t __a, const int __b)
22687 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
22690 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22691 vrshr_n_u16 (uint16x4_t __a, const int __b)
22693 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
22696 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22697 vrshr_n_u32 (uint32x2_t __a, const int __b)
22699 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
22702 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22703 vrshr_n_u64 (uint64x1_t __a, const int __b)
22705 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
22708 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22709 vrshrq_n_s8 (int8x16_t __a, const int __b)
22711 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
22714 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22715 vrshrq_n_s16 (int16x8_t __a, const int __b)
22717 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
22720 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22721 vrshrq_n_s32 (int32x4_t __a, const int __b)
22723 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
22726 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22727 vrshrq_n_s64 (int64x2_t __a, const int __b)
22729 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
22732 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22733 vrshrq_n_u8 (uint8x16_t __a, const int __b)
22735 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
22738 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22739 vrshrq_n_u16 (uint16x8_t __a, const int __b)
22741 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
22744 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22745 vrshrq_n_u32 (uint32x4_t __a, const int __b)
22747 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
22750 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22751 vrshrq_n_u64 (uint64x2_t __a, const int __b)
22753 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
22756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22757 vrshrd_n_s64 (int64x1_t __a, const int __b)
22759 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22762 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22763 vrshrd_n_u64 (uint64x1_t __a, const int __b)
22765 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
22768 /* vrsra */
22770 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22771 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22773 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
22776 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22777 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22779 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
22782 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22783 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22785 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
22788 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22789 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22791 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22794 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22795 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22797 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
22798 (int8x8_t) __b, __c);
22801 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22802 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22804 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
22805 (int16x4_t) __b, __c);
22808 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22809 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22811 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
22812 (int32x2_t) __b, __c);
22815 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22816 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22818 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
22819 (int64x1_t) __b, __c);
22822 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22823 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22825 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
22828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22829 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22831 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
22834 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22835 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22837 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
22840 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22841 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22843 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
22846 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22847 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22849 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
22850 (int8x16_t) __b, __c);
22853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22854 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22856 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
22857 (int16x8_t) __b, __c);
22860 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22861 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22863 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
22864 (int32x4_t) __b, __c);
22867 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22868 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22870 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
22871 (int64x2_t) __b, __c);
22874 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22875 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22877 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22880 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22881 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22883 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
22886 #ifdef __ARM_FEATURE_CRYPTO
22888 /* vsha1 */
22890 static __inline uint32x4_t
22891 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22893 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
22895 static __inline uint32x4_t
22896 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22898 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
22900 static __inline uint32x4_t
22901 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22903 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
22906 static __inline uint32_t
22907 vsha1h_u32 (uint32_t hash_e)
22909 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
22912 static __inline uint32x4_t
22913 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
22915 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
22918 static __inline uint32x4_t
22919 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
22921 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
22924 static __inline uint32x4_t
22925 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
22927 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
22930 static __inline uint32x4_t
22931 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
22933 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
22936 static __inline uint32x4_t
22937 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
22939 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
22942 static __inline uint32x4_t
22943 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
22945 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
22948 static __inline poly128_t
22949 vmull_p64 (poly64_t a, poly64_t b)
22951 return
22952 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
22955 static __inline poly128_t
22956 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
22958 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
22961 #endif
22963 /* vshl */
22965 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22966 vshl_n_s8 (int8x8_t __a, const int __b)
22968 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
22971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22972 vshl_n_s16 (int16x4_t __a, const int __b)
22974 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
22977 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22978 vshl_n_s32 (int32x2_t __a, const int __b)
22980 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
22983 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22984 vshl_n_s64 (int64x1_t __a, const int __b)
22986 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
22989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22990 vshl_n_u8 (uint8x8_t __a, const int __b)
22992 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
22995 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22996 vshl_n_u16 (uint16x4_t __a, const int __b)
22998 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23002 vshl_n_u32 (uint32x2_t __a, const int __b)
23004 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23007 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23008 vshl_n_u64 (uint64x1_t __a, const int __b)
23010 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23013 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23014 vshlq_n_s8 (int8x16_t __a, const int __b)
23016 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23019 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23020 vshlq_n_s16 (int16x8_t __a, const int __b)
23022 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23025 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23026 vshlq_n_s32 (int32x4_t __a, const int __b)
23028 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23031 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23032 vshlq_n_s64 (int64x2_t __a, const int __b)
23034 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23037 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23038 vshlq_n_u8 (uint8x16_t __a, const int __b)
23040 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23043 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23044 vshlq_n_u16 (uint16x8_t __a, const int __b)
23046 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23049 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23050 vshlq_n_u32 (uint32x4_t __a, const int __b)
23052 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23055 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23056 vshlq_n_u64 (uint64x2_t __a, const int __b)
23058 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23061 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23062 vshld_n_s64 (int64x1_t __a, const int __b)
23064 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23067 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23068 vshld_n_u64 (uint64x1_t __a, const int __b)
23070 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23073 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23074 vshl_s8 (int8x8_t __a, int8x8_t __b)
23076 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23079 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23080 vshl_s16 (int16x4_t __a, int16x4_t __b)
23082 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23085 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23086 vshl_s32 (int32x2_t __a, int32x2_t __b)
23088 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23091 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23092 vshl_s64 (int64x1_t __a, int64x1_t __b)
23094 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23097 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23098 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23100 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23103 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23104 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23106 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23109 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23110 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23112 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23115 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23116 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23118 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23122 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23124 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23127 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23128 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23130 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23134 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23136 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23139 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23140 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23142 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23145 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23146 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23148 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23151 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23152 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23154 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23158 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23160 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23163 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23164 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23166 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23169 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23170 vshld_s64 (int64x1_t __a, int64x1_t __b)
23172 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23175 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23176 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23178 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23181 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23182 vshll_high_n_s8 (int8x16_t __a, const int __b)
23184 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23187 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23188 vshll_high_n_s16 (int16x8_t __a, const int __b)
23190 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23193 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23194 vshll_high_n_s32 (int32x4_t __a, const int __b)
23196 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23199 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23200 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23202 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23205 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23206 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23208 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23211 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23212 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23214 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23217 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23218 vshll_n_s8 (int8x8_t __a, const int __b)
23220 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23223 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23224 vshll_n_s16 (int16x4_t __a, const int __b)
23226 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23229 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23230 vshll_n_s32 (int32x2_t __a, const int __b)
23232 return __builtin_aarch64_sshll_nv2si (__a, __b);
23235 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23236 vshll_n_u8 (uint8x8_t __a, const int __b)
23238 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23241 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23242 vshll_n_u16 (uint16x4_t __a, const int __b)
23244 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23247 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23248 vshll_n_u32 (uint32x2_t __a, const int __b)
23250 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23253 /* vshr */
23255 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23256 vshr_n_s8 (int8x8_t __a, const int __b)
23258 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23261 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23262 vshr_n_s16 (int16x4_t __a, const int __b)
23264 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23267 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23268 vshr_n_s32 (int32x2_t __a, const int __b)
23270 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23273 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23274 vshr_n_s64 (int64x1_t __a, const int __b)
23276 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23279 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23280 vshr_n_u8 (uint8x8_t __a, const int __b)
23282 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23285 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23286 vshr_n_u16 (uint16x4_t __a, const int __b)
23288 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23291 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23292 vshr_n_u32 (uint32x2_t __a, const int __b)
23294 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23297 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23298 vshr_n_u64 (uint64x1_t __a, const int __b)
23300 return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
23303 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23304 vshrq_n_s8 (int8x16_t __a, const int __b)
23306 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23309 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23310 vshrq_n_s16 (int16x8_t __a, const int __b)
23312 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23315 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23316 vshrq_n_s32 (int32x4_t __a, const int __b)
23318 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23321 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23322 vshrq_n_s64 (int64x2_t __a, const int __b)
23324 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23327 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23328 vshrq_n_u8 (uint8x16_t __a, const int __b)
23330 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23333 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23334 vshrq_n_u16 (uint16x8_t __a, const int __b)
23336 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23339 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23340 vshrq_n_u32 (uint32x4_t __a, const int __b)
23342 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23345 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23346 vshrq_n_u64 (uint64x2_t __a, const int __b)
23348 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23351 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23352 vshrd_n_s64 (int64x1_t __a, const int __b)
23354 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23357 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23358 vshrd_n_u64 (uint64_t __a, const int __b)
23360 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
23363 /* vsli */
23365 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23366 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23368 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23372 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23374 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23377 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23378 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23380 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23383 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23384 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23386 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23389 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23390 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23392 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23393 (int8x8_t) __b, __c);
23396 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23397 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23399 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23400 (int16x4_t) __b, __c);
23403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23404 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23406 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23407 (int32x2_t) __b, __c);
23410 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23411 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23413 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23414 (int64x1_t) __b, __c);
23417 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23418 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23420 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23424 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23426 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23429 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23430 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23432 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23435 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23436 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23438 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23441 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23442 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23444 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23445 (int8x16_t) __b, __c);
23448 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23449 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23451 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23452 (int16x8_t) __b, __c);
23455 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23456 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23458 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23459 (int32x4_t) __b, __c);
23462 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23463 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23465 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23466 (int64x2_t) __b, __c);
23469 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23470 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23472 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23475 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23476 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23478 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23481 /* vsqadd */
23483 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23484 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23486 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23487 (int8x8_t) __b);
23490 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23491 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23493 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23494 (int16x4_t) __b);
23497 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23498 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23500 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23501 (int32x2_t) __b);
23504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23505 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23507 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23510 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23511 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23513 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23514 (int8x16_t) __b);
23517 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23518 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23520 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23521 (int16x8_t) __b);
23524 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23525 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23527 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23528 (int32x4_t) __b);
23531 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23532 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23534 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23535 (int64x2_t) __b);
23538 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23539 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23541 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23544 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23545 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23547 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23550 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23551 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23553 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23556 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23557 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23559 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23562 /* vsqrt */
23563 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23564 vsqrt_f32 (float32x2_t a)
23566 return __builtin_aarch64_sqrtv2sf (a);
23569 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23570 vsqrtq_f32 (float32x4_t a)
23572 return __builtin_aarch64_sqrtv4sf (a);
23575 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23576 vsqrtq_f64 (float64x2_t a)
23578 return __builtin_aarch64_sqrtv2df (a);
23581 /* vsra */
23583 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23584 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23586 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23589 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23590 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23592 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23595 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23596 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23598 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23601 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23602 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23604 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23607 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23608 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23610 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23611 (int8x8_t) __b, __c);
23614 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23615 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23617 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23618 (int16x4_t) __b, __c);
23621 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23622 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23624 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23625 (int32x2_t) __b, __c);
23628 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23629 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23631 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
23632 (int64x1_t) __b, __c);
23635 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23636 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23638 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
23641 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23642 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23644 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
23647 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23648 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23650 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
23653 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23654 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23656 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
23659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23660 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23662 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
23663 (int8x16_t) __b, __c);
23666 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23667 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23669 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
23670 (int16x8_t) __b, __c);
23673 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23674 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23676 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
23677 (int32x4_t) __b, __c);
23680 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23681 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23683 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
23684 (int64x2_t) __b, __c);
23687 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23688 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23690 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23693 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23694 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23696 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
23699 /* vsri */
23701 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23702 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23704 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
23707 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23708 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23710 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
23713 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23714 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23716 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23719 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23720 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23722 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23725 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23726 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23728 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
23729 (int8x8_t) __b, __c);
23732 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23733 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23735 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
23736 (int16x4_t) __b, __c);
23739 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23740 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23742 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
23743 (int32x2_t) __b, __c);
23746 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23747 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23749 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
23750 (int64x1_t) __b, __c);
23753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23754 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23756 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23759 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23760 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23762 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
23765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23766 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23768 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
23771 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23772 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23774 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
23777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23778 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23780 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
23781 (int8x16_t) __b, __c);
23784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23785 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23787 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
23788 (int16x8_t) __b, __c);
23791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23792 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23794 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
23795 (int32x4_t) __b, __c);
23798 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23799 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23801 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
23802 (int64x2_t) __b, __c);
23805 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23806 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23808 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23811 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23812 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23814 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
23817 /* vst1 */
23819 __extension__ static __inline void __attribute__ ((__always_inline__))
23820 vst1_f32 (float32_t *a, float32x2_t b)
23822 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
23825 __extension__ static __inline void __attribute__ ((__always_inline__))
23826 vst1_f64 (float64_t *a, float64x1_t b)
23828 *a = b;
23831 __extension__ static __inline void __attribute__ ((__always_inline__))
23832 vst1_p8 (poly8_t *a, poly8x8_t b)
23834 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23835 (int8x8_t) b);
23838 __extension__ static __inline void __attribute__ ((__always_inline__))
23839 vst1_p16 (poly16_t *a, poly16x4_t b)
23841 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23842 (int16x4_t) b);
23845 __extension__ static __inline void __attribute__ ((__always_inline__))
23846 vst1_s8 (int8_t *a, int8x8_t b)
23848 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
23851 __extension__ static __inline void __attribute__ ((__always_inline__))
23852 vst1_s16 (int16_t *a, int16x4_t b)
23854 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
23857 __extension__ static __inline void __attribute__ ((__always_inline__))
23858 vst1_s32 (int32_t *a, int32x2_t b)
23860 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
23863 __extension__ static __inline void __attribute__ ((__always_inline__))
23864 vst1_s64 (int64_t *a, int64x1_t b)
23866 *a = b;
23869 __extension__ static __inline void __attribute__ ((__always_inline__))
23870 vst1_u8 (uint8_t *a, uint8x8_t b)
23872 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23873 (int8x8_t) b);
23876 __extension__ static __inline void __attribute__ ((__always_inline__))
23877 vst1_u16 (uint16_t *a, uint16x4_t b)
23879 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23880 (int16x4_t) b);
23883 __extension__ static __inline void __attribute__ ((__always_inline__))
23884 vst1_u32 (uint32_t *a, uint32x2_t b)
23886 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
23887 (int32x2_t) b);
23890 __extension__ static __inline void __attribute__ ((__always_inline__))
23891 vst1_u64 (uint64_t *a, uint64x1_t b)
23893 *a = b;
23896 __extension__ static __inline void __attribute__ ((__always_inline__))
23897 vst1q_f32 (float32_t *a, float32x4_t b)
23899 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
23902 __extension__ static __inline void __attribute__ ((__always_inline__))
23903 vst1q_f64 (float64_t *a, float64x2_t b)
23905 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
23908 /* vst1q */
23910 __extension__ static __inline void __attribute__ ((__always_inline__))
23911 vst1q_p8 (poly8_t *a, poly8x16_t b)
23913 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23914 (int8x16_t) b);
23917 __extension__ static __inline void __attribute__ ((__always_inline__))
23918 vst1q_p16 (poly16_t *a, poly16x8_t b)
23920 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23921 (int16x8_t) b);
23924 __extension__ static __inline void __attribute__ ((__always_inline__))
23925 vst1q_s8 (int8_t *a, int8x16_t b)
23927 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
23930 __extension__ static __inline void __attribute__ ((__always_inline__))
23931 vst1q_s16 (int16_t *a, int16x8_t b)
23933 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
23936 __extension__ static __inline void __attribute__ ((__always_inline__))
23937 vst1q_s32 (int32_t *a, int32x4_t b)
23939 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
23942 __extension__ static __inline void __attribute__ ((__always_inline__))
23943 vst1q_s64 (int64_t *a, int64x2_t b)
23945 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
23948 __extension__ static __inline void __attribute__ ((__always_inline__))
23949 vst1q_u8 (uint8_t *a, uint8x16_t b)
23951 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23952 (int8x16_t) b);
23955 __extension__ static __inline void __attribute__ ((__always_inline__))
23956 vst1q_u16 (uint16_t *a, uint16x8_t b)
23958 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23959 (int16x8_t) b);
23962 __extension__ static __inline void __attribute__ ((__always_inline__))
23963 vst1q_u32 (uint32_t *a, uint32x4_t b)
23965 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
23966 (int32x4_t) b);
23969 __extension__ static __inline void __attribute__ ((__always_inline__))
23970 vst1q_u64 (uint64_t *a, uint64x2_t b)
23972 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
23973 (int64x2_t) b);
23976 /* vstn */
23978 __extension__ static __inline void
23979 vst2_s64 (int64_t * __a, int64x1x2_t val)
23981 __builtin_aarch64_simd_oi __o;
23982 int64x2x2_t temp;
23983 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23984 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23985 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23986 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23987 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23990 __extension__ static __inline void
23991 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
23993 __builtin_aarch64_simd_oi __o;
23994 uint64x2x2_t temp;
23995 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23996 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23997 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23998 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23999 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24002 __extension__ static __inline void
24003 vst2_f64 (float64_t * __a, float64x1x2_t val)
24005 __builtin_aarch64_simd_oi __o;
24006 float64x2x2_t temp;
24007 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24008 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24009 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24010 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24011 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24014 __extension__ static __inline void
24015 vst2_s8 (int8_t * __a, int8x8x2_t val)
24017 __builtin_aarch64_simd_oi __o;
24018 int8x16x2_t temp;
24019 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24020 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24021 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24022 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24023 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24026 __extension__ static __inline void __attribute__ ((__always_inline__))
24027 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24029 __builtin_aarch64_simd_oi __o;
24030 poly8x16x2_t temp;
24031 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24032 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24033 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24034 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24035 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24038 __extension__ static __inline void __attribute__ ((__always_inline__))
24039 vst2_s16 (int16_t * __a, int16x4x2_t val)
24041 __builtin_aarch64_simd_oi __o;
24042 int16x8x2_t temp;
24043 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24044 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24045 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24046 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24047 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24050 __extension__ static __inline void __attribute__ ((__always_inline__))
24051 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24053 __builtin_aarch64_simd_oi __o;
24054 poly16x8x2_t temp;
24055 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24056 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24057 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24058 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24059 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24062 __extension__ static __inline void __attribute__ ((__always_inline__))
24063 vst2_s32 (int32_t * __a, int32x2x2_t val)
24065 __builtin_aarch64_simd_oi __o;
24066 int32x4x2_t temp;
24067 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24068 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24069 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24070 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24071 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24074 __extension__ static __inline void __attribute__ ((__always_inline__))
24075 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24077 __builtin_aarch64_simd_oi __o;
24078 uint8x16x2_t temp;
24079 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24080 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24081 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24082 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24083 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24086 __extension__ static __inline void __attribute__ ((__always_inline__))
24087 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24089 __builtin_aarch64_simd_oi __o;
24090 uint16x8x2_t temp;
24091 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24092 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24093 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24094 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24095 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24098 __extension__ static __inline void __attribute__ ((__always_inline__))
24099 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24101 __builtin_aarch64_simd_oi __o;
24102 uint32x4x2_t temp;
24103 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24104 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24105 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24106 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24107 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24110 __extension__ static __inline void __attribute__ ((__always_inline__))
24111 vst2_f32 (float32_t * __a, float32x2x2_t val)
24113 __builtin_aarch64_simd_oi __o;
24114 float32x4x2_t temp;
24115 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24116 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24117 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24118 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24119 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24122 __extension__ static __inline void __attribute__ ((__always_inline__))
24123 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24125 __builtin_aarch64_simd_oi __o;
24126 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24127 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24128 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24131 __extension__ static __inline void __attribute__ ((__always_inline__))
24132 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24134 __builtin_aarch64_simd_oi __o;
24135 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24136 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24137 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24140 __extension__ static __inline void __attribute__ ((__always_inline__))
24141 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24143 __builtin_aarch64_simd_oi __o;
24144 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24145 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24146 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24149 __extension__ static __inline void __attribute__ ((__always_inline__))
24150 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24152 __builtin_aarch64_simd_oi __o;
24153 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24154 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24155 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24158 __extension__ static __inline void __attribute__ ((__always_inline__))
24159 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24161 __builtin_aarch64_simd_oi __o;
24162 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24163 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24164 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24167 __extension__ static __inline void __attribute__ ((__always_inline__))
24168 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24170 __builtin_aarch64_simd_oi __o;
24171 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24172 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24173 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24176 __extension__ static __inline void __attribute__ ((__always_inline__))
24177 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24179 __builtin_aarch64_simd_oi __o;
24180 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24181 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24182 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24185 __extension__ static __inline void __attribute__ ((__always_inline__))
24186 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24188 __builtin_aarch64_simd_oi __o;
24189 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24190 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24191 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24194 __extension__ static __inline void __attribute__ ((__always_inline__))
24195 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24197 __builtin_aarch64_simd_oi __o;
24198 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24199 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24200 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24203 __extension__ static __inline void __attribute__ ((__always_inline__))
24204 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24206 __builtin_aarch64_simd_oi __o;
24207 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24208 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24209 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24212 __extension__ static __inline void __attribute__ ((__always_inline__))
24213 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24215 __builtin_aarch64_simd_oi __o;
24216 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24217 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24218 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24221 __extension__ static __inline void __attribute__ ((__always_inline__))
24222 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24224 __builtin_aarch64_simd_oi __o;
24225 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24226 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24227 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24230 __extension__ static __inline void
24231 vst3_s64 (int64_t * __a, int64x1x3_t val)
24233 __builtin_aarch64_simd_ci __o;
24234 int64x2x3_t temp;
24235 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24236 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24237 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24238 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24239 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24240 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24241 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24244 __extension__ static __inline void
24245 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24247 __builtin_aarch64_simd_ci __o;
24248 uint64x2x3_t temp;
24249 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24250 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24251 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24252 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24253 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24254 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24255 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24258 __extension__ static __inline void
24259 vst3_f64 (float64_t * __a, float64x1x3_t val)
24261 __builtin_aarch64_simd_ci __o;
24262 float64x2x3_t temp;
24263 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24264 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24265 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24266 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24267 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24268 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24269 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24272 __extension__ static __inline void
24273 vst3_s8 (int8_t * __a, int8x8x3_t val)
24275 __builtin_aarch64_simd_ci __o;
24276 int8x16x3_t temp;
24277 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24278 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24279 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24280 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24281 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24282 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24283 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24286 __extension__ static __inline void __attribute__ ((__always_inline__))
24287 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24289 __builtin_aarch64_simd_ci __o;
24290 poly8x16x3_t temp;
24291 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24292 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24293 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24294 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24295 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24296 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24297 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24300 __extension__ static __inline void __attribute__ ((__always_inline__))
24301 vst3_s16 (int16_t * __a, int16x4x3_t val)
24303 __builtin_aarch64_simd_ci __o;
24304 int16x8x3_t temp;
24305 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24306 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24307 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24308 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24309 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24310 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24311 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24314 __extension__ static __inline void __attribute__ ((__always_inline__))
24315 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24317 __builtin_aarch64_simd_ci __o;
24318 poly16x8x3_t temp;
24319 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24320 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24321 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24322 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24323 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24324 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24325 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24328 __extension__ static __inline void __attribute__ ((__always_inline__))
24329 vst3_s32 (int32_t * __a, int32x2x3_t val)
24331 __builtin_aarch64_simd_ci __o;
24332 int32x4x3_t temp;
24333 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24334 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24335 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24336 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24337 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24338 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24339 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24342 __extension__ static __inline void __attribute__ ((__always_inline__))
24343 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24345 __builtin_aarch64_simd_ci __o;
24346 uint8x16x3_t temp;
24347 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24348 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24349 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24350 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24351 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24352 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24353 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24356 __extension__ static __inline void __attribute__ ((__always_inline__))
24357 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24359 __builtin_aarch64_simd_ci __o;
24360 uint16x8x3_t temp;
24361 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24362 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24363 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24364 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24365 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24366 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24367 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24370 __extension__ static __inline void __attribute__ ((__always_inline__))
24371 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24373 __builtin_aarch64_simd_ci __o;
24374 uint32x4x3_t temp;
24375 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24376 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24377 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24378 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24379 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24380 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24381 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24384 __extension__ static __inline void __attribute__ ((__always_inline__))
24385 vst3_f32 (float32_t * __a, float32x2x3_t val)
24387 __builtin_aarch64_simd_ci __o;
24388 float32x4x3_t temp;
24389 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24390 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24391 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24392 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24393 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24394 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24395 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24398 __extension__ static __inline void __attribute__ ((__always_inline__))
24399 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24401 __builtin_aarch64_simd_ci __o;
24402 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24403 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24404 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24405 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24408 __extension__ static __inline void __attribute__ ((__always_inline__))
24409 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24411 __builtin_aarch64_simd_ci __o;
24412 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24413 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24414 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24415 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24418 __extension__ static __inline void __attribute__ ((__always_inline__))
24419 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24421 __builtin_aarch64_simd_ci __o;
24422 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24423 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24424 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24425 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24428 __extension__ static __inline void __attribute__ ((__always_inline__))
24429 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24431 __builtin_aarch64_simd_ci __o;
24432 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24433 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24434 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24435 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24438 __extension__ static __inline void __attribute__ ((__always_inline__))
24439 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24441 __builtin_aarch64_simd_ci __o;
24442 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24443 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24444 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24445 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24448 __extension__ static __inline void __attribute__ ((__always_inline__))
24449 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24451 __builtin_aarch64_simd_ci __o;
24452 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24453 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24454 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24455 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24458 __extension__ static __inline void __attribute__ ((__always_inline__))
24459 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24461 __builtin_aarch64_simd_ci __o;
24462 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24463 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24464 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24465 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24468 __extension__ static __inline void __attribute__ ((__always_inline__))
24469 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24471 __builtin_aarch64_simd_ci __o;
24472 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24473 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24474 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24475 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24478 __extension__ static __inline void __attribute__ ((__always_inline__))
24479 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24481 __builtin_aarch64_simd_ci __o;
24482 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24483 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24484 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24485 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24488 __extension__ static __inline void __attribute__ ((__always_inline__))
24489 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24491 __builtin_aarch64_simd_ci __o;
24492 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24493 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24494 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24495 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24498 __extension__ static __inline void __attribute__ ((__always_inline__))
24499 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24501 __builtin_aarch64_simd_ci __o;
24502 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24503 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24504 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24505 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24508 __extension__ static __inline void __attribute__ ((__always_inline__))
24509 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24511 __builtin_aarch64_simd_ci __o;
24512 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24513 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24514 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24515 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24518 __extension__ static __inline void
24519 vst4_s64 (int64_t * __a, int64x1x4_t val)
24521 __builtin_aarch64_simd_xi __o;
24522 int64x2x4_t temp;
24523 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24524 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24525 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24526 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24527 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24528 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24529 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24530 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24531 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24534 __extension__ static __inline void
24535 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24537 __builtin_aarch64_simd_xi __o;
24538 uint64x2x4_t temp;
24539 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24540 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24541 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24542 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24543 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24544 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24545 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24546 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24547 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24550 __extension__ static __inline void
24551 vst4_f64 (float64_t * __a, float64x1x4_t val)
24553 __builtin_aarch64_simd_xi __o;
24554 float64x2x4_t temp;
24555 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24556 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24557 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24558 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24559 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24560 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24561 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24562 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24563 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24566 __extension__ static __inline void
24567 vst4_s8 (int8_t * __a, int8x8x4_t val)
24569 __builtin_aarch64_simd_xi __o;
24570 int8x16x4_t temp;
24571 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24572 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24573 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24574 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24575 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24576 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24577 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24578 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24579 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24582 __extension__ static __inline void __attribute__ ((__always_inline__))
24583 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24585 __builtin_aarch64_simd_xi __o;
24586 poly8x16x4_t temp;
24587 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24588 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24589 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24590 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24591 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24592 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24593 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24594 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24595 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24598 __extension__ static __inline void __attribute__ ((__always_inline__))
24599 vst4_s16 (int16_t * __a, int16x4x4_t val)
24601 __builtin_aarch64_simd_xi __o;
24602 int16x8x4_t temp;
24603 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24604 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24605 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24606 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24607 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24608 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24609 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24610 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24611 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24614 __extension__ static __inline void __attribute__ ((__always_inline__))
24615 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24617 __builtin_aarch64_simd_xi __o;
24618 poly16x8x4_t temp;
24619 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24620 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24621 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24622 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24623 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24624 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24625 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24626 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24627 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24630 __extension__ static __inline void __attribute__ ((__always_inline__))
24631 vst4_s32 (int32_t * __a, int32x2x4_t val)
24633 __builtin_aarch64_simd_xi __o;
24634 int32x4x4_t temp;
24635 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24636 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24637 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24638 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
24639 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24640 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24641 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24642 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24643 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24646 __extension__ static __inline void __attribute__ ((__always_inline__))
24647 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24649 __builtin_aarch64_simd_xi __o;
24650 uint8x16x4_t temp;
24651 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24652 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24653 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24654 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
24655 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24656 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24657 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24658 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24659 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24662 __extension__ static __inline void __attribute__ ((__always_inline__))
24663 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24665 __builtin_aarch64_simd_xi __o;
24666 uint16x8x4_t temp;
24667 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24668 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24669 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24670 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
24671 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24672 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24673 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24674 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24675 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24678 __extension__ static __inline void __attribute__ ((__always_inline__))
24679 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24681 __builtin_aarch64_simd_xi __o;
24682 uint32x4x4_t temp;
24683 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24684 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24685 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24686 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
24687 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24688 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24689 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24690 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24691 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24694 __extension__ static __inline void __attribute__ ((__always_inline__))
24695 vst4_f32 (float32_t * __a, float32x2x4_t val)
24697 __builtin_aarch64_simd_xi __o;
24698 float32x4x4_t temp;
24699 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24700 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24701 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24702 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
24703 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24704 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24705 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24706 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24707 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24710 __extension__ static __inline void __attribute__ ((__always_inline__))
24711 vst4q_s8 (int8_t * __a, int8x16x4_t val)
24713 __builtin_aarch64_simd_xi __o;
24714 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24715 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24716 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24717 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24718 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24721 __extension__ static __inline void __attribute__ ((__always_inline__))
24722 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24724 __builtin_aarch64_simd_xi __o;
24725 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24726 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24727 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24728 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24729 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24732 __extension__ static __inline void __attribute__ ((__always_inline__))
24733 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24735 __builtin_aarch64_simd_xi __o;
24736 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24737 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24738 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24739 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24740 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24743 __extension__ static __inline void __attribute__ ((__always_inline__))
24744 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24746 __builtin_aarch64_simd_xi __o;
24747 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24748 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24749 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24750 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24751 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24754 __extension__ static __inline void __attribute__ ((__always_inline__))
24755 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24757 __builtin_aarch64_simd_xi __o;
24758 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24759 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24760 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24761 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24762 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24765 __extension__ static __inline void __attribute__ ((__always_inline__))
24766 vst4q_s64 (int64_t * __a, int64x2x4_t val)
24768 __builtin_aarch64_simd_xi __o;
24769 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24770 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24771 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24772 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24773 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24776 __extension__ static __inline void __attribute__ ((__always_inline__))
24777 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24779 __builtin_aarch64_simd_xi __o;
24780 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24781 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24782 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24783 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24784 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24787 __extension__ static __inline void __attribute__ ((__always_inline__))
24788 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24790 __builtin_aarch64_simd_xi __o;
24791 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24792 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24793 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24794 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24795 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24798 __extension__ static __inline void __attribute__ ((__always_inline__))
24799 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24801 __builtin_aarch64_simd_xi __o;
24802 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24803 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24804 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24805 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24806 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24809 __extension__ static __inline void __attribute__ ((__always_inline__))
24810 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24812 __builtin_aarch64_simd_xi __o;
24813 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24814 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24815 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24816 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24817 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24820 __extension__ static __inline void __attribute__ ((__always_inline__))
24821 vst4q_f32 (float32_t * __a, float32x4x4_t val)
24823 __builtin_aarch64_simd_xi __o;
24824 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24825 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24826 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24827 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24828 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24831 __extension__ static __inline void __attribute__ ((__always_inline__))
24832 vst4q_f64 (float64_t * __a, float64x2x4_t val)
24834 __builtin_aarch64_simd_xi __o;
24835 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24836 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24837 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24838 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24839 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24842 /* vsub */
24844 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24845 vsubd_s64 (int64x1_t __a, int64x1_t __b)
24847 return __a - __b;
24850 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24851 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
24853 return __a - __b;
24856 /* vtbx1 */
24858 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24859 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24861 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24862 vmov_n_u8 (8));
24863 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24865 return vbsl_s8 (__mask, __tbl, __r);
24868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24869 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24871 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24872 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24874 return vbsl_u8 (__mask, __tbl, __r);
24877 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24878 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24880 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24881 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24883 return vbsl_p8 (__mask, __tbl, __r);
24886 /* vtbx3 */
24888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24889 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24891 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24892 vmov_n_u8 (24));
24893 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24895 return vbsl_s8 (__mask, __tbl, __r);
24898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24899 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24901 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24902 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24904 return vbsl_u8 (__mask, __tbl, __r);
24907 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24908 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24910 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24911 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24913 return vbsl_p8 (__mask, __tbl, __r);
24916 /* vtrn */
24918 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24919 vtrn_f32 (float32x2_t a, float32x2_t b)
24921 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24924 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24925 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24927 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24930 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24931 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24933 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24936 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24937 vtrn_s8 (int8x8_t a, int8x8_t b)
24939 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24942 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24943 vtrn_s16 (int16x4_t a, int16x4_t b)
24945 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24948 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24949 vtrn_s32 (int32x2_t a, int32x2_t b)
24951 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24954 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24955 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24957 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24960 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24961 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24963 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24966 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24967 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24969 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24972 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24973 vtrnq_f32 (float32x4_t a, float32x4_t b)
24975 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24978 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24979 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24981 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24984 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24985 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24987 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24990 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24991 vtrnq_s8 (int8x16_t a, int8x16_t b)
24993 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24996 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24997 vtrnq_s16 (int16x8_t a, int16x8_t b)
24999 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25002 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25003 vtrnq_s32 (int32x4_t a, int32x4_t b)
25005 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25008 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25009 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25011 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25014 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25015 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25017 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25020 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25021 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25023 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25026 /* vtst */
25028 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25029 vtst_s8 (int8x8_t __a, int8x8_t __b)
25031 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25034 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25035 vtst_s16 (int16x4_t __a, int16x4_t __b)
25037 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25040 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25041 vtst_s32 (int32x2_t __a, int32x2_t __b)
25043 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25046 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25047 vtst_s64 (int64x1_t __a, int64x1_t __b)
25049 return (__a & __b) ? -1ll : 0ll;
25052 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25053 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25055 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25056 (int8x8_t) __b);
25059 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25060 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25062 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25063 (int16x4_t) __b);
25066 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25067 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25069 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25070 (int32x2_t) __b);
25073 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25074 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25076 return (__a & __b) ? -1ll : 0ll;
25079 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25080 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25082 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25085 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25086 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25088 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25091 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25092 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25094 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25097 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25098 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25100 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25103 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25104 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25106 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25107 (int8x16_t) __b);
25110 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25111 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25113 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25114 (int16x8_t) __b);
25117 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25118 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25120 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25121 (int32x4_t) __b);
25124 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25125 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25127 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25128 (int64x2_t) __b);
25131 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25132 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25134 return (__a & __b) ? -1ll : 0ll;
25137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25138 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25140 return (__a & __b) ? -1ll : 0ll;
25143 /* vuqadd */
25145 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25146 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25148 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25151 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25152 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25154 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25157 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25158 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25160 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25163 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25164 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25166 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25169 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25170 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25172 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25175 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25176 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25178 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25181 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25182 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25184 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25187 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25188 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25190 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25193 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25194 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25196 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25199 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25200 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25202 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25205 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25206 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25208 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25211 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25212 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25214 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25217 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25218 __extension__ static __inline rettype \
25219 __attribute__ ((__always_inline__)) \
25220 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25222 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25223 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25226 #define __INTERLEAVE_LIST(op) \
25227 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25228 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25229 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25230 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25231 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25232 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25233 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25234 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25235 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25236 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25237 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25238 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25239 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25240 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25241 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25242 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25243 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25244 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25246 /* vuzp */
25248 __INTERLEAVE_LIST (uzp)
25250 /* vzip */
25252 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25253 vzip1_f32 (float32x2_t __a, float32x2_t __b)
25255 #ifdef __AARCH64EB__
25256 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25257 #else
25258 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25259 #endif
25262 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25263 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
25265 #ifdef __AARCH64EB__
25266 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25267 #else
25268 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25269 #endif
25272 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25273 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
25275 #ifdef __AARCH64EB__
25276 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25277 #else
25278 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25279 #endif
25282 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25283 vzip1_s8 (int8x8_t __a, int8x8_t __b)
25285 #ifdef __AARCH64EB__
25286 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25287 #else
25288 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25289 #endif
25292 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25293 vzip1_s16 (int16x4_t __a, int16x4_t __b)
25295 #ifdef __AARCH64EB__
25296 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25297 #else
25298 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25299 #endif
25302 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25303 vzip1_s32 (int32x2_t __a, int32x2_t __b)
25305 #ifdef __AARCH64EB__
25306 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25307 #else
25308 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25309 #endif
25312 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25313 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
25315 #ifdef __AARCH64EB__
25316 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25317 #else
25318 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25319 #endif
25322 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25323 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
25325 #ifdef __AARCH64EB__
25326 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25327 #else
25328 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25329 #endif
25332 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25333 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
25335 #ifdef __AARCH64EB__
25336 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25337 #else
25338 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25339 #endif
25342 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25343 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
25345 #ifdef __AARCH64EB__
25346 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25347 #else
25348 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25349 #endif
25352 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25353 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
25355 #ifdef __AARCH64EB__
25356 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25357 #else
25358 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25359 #endif
25362 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25363 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
25365 #ifdef __AARCH64EB__
25366 return __builtin_shuffle (__a, __b, (uint8x16_t)
25367 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25368 #else
25369 return __builtin_shuffle (__a, __b, (uint8x16_t)
25370 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25371 #endif
25374 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25375 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
25377 #ifdef __AARCH64EB__
25378 return __builtin_shuffle (__a, __b, (uint16x8_t)
25379 {12, 4, 13, 5, 14, 6, 15, 7});
25380 #else
25381 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25382 #endif
25385 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25386 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
25388 #ifdef __AARCH64EB__
25389 return __builtin_shuffle (__a, __b, (uint8x16_t)
25390 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25391 #else
25392 return __builtin_shuffle (__a, __b, (uint8x16_t)
25393 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25394 #endif
25397 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25398 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
25400 #ifdef __AARCH64EB__
25401 return __builtin_shuffle (__a, __b, (uint16x8_t)
25402 {12, 4, 13, 5, 14, 6, 15, 7});
25403 #else
25404 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25405 #endif
25408 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25409 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
25411 #ifdef __AARCH64EB__
25412 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25413 #else
25414 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25415 #endif
25418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25419 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
25421 #ifdef __AARCH64EB__
25422 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25423 #else
25424 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25425 #endif
25428 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25429 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
25431 #ifdef __AARCH64EB__
25432 return __builtin_shuffle (__a, __b, (uint8x16_t)
25433 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25434 #else
25435 return __builtin_shuffle (__a, __b, (uint8x16_t)
25436 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25437 #endif
25440 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25441 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
25443 #ifdef __AARCH64EB__
25444 return __builtin_shuffle (__a, __b, (uint16x8_t)
25445 {12, 4, 13, 5, 14, 6, 15, 7});
25446 #else
25447 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25448 #endif
25451 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25452 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25454 #ifdef __AARCH64EB__
25455 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25456 #else
25457 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25458 #endif
25461 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25462 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25464 #ifdef __AARCH64EB__
25465 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25466 #else
25467 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25468 #endif
25471 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25472 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25474 #ifdef __AARCH64EB__
25475 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25476 #else
25477 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25478 #endif
25481 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25482 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25484 #ifdef __AARCH64EB__
25485 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25486 #else
25487 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25488 #endif
25491 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25492 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25494 #ifdef __AARCH64EB__
25495 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25496 #else
25497 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25498 #endif
25501 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25502 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25504 #ifdef __AARCH64EB__
25505 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25506 #else
25507 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25508 #endif
25511 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25512 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25514 #ifdef __AARCH64EB__
25515 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25516 #else
25517 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25518 #endif
25521 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25522 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25524 #ifdef __AARCH64EB__
25525 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25526 #else
25527 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25528 #endif
25531 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25532 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25534 #ifdef __AARCH64EB__
25535 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25536 #else
25537 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25538 #endif
25541 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25542 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25544 #ifdef __AARCH64EB__
25545 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25546 #else
25547 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25548 #endif
25551 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25552 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25554 #ifdef __AARCH64EB__
25555 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25556 #else
25557 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25558 #endif
25561 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25562 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25564 #ifdef __AARCH64EB__
25565 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25566 #else
25567 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25568 #endif
25571 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25572 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25574 #ifdef __AARCH64EB__
25575 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25576 #else
25577 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25578 #endif
25581 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25582 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25584 #ifdef __AARCH64EB__
25585 return __builtin_shuffle (__a, __b, (uint8x16_t)
25586 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25587 #else
25588 return __builtin_shuffle (__a, __b, (uint8x16_t)
25589 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25590 #endif
25593 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25594 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25596 #ifdef __AARCH64EB__
25597 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25598 #else
25599 return __builtin_shuffle (__a, __b, (uint16x8_t)
25600 {4, 12, 5, 13, 6, 14, 7, 15});
25601 #endif
25604 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25605 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25607 #ifdef __AARCH64EB__
25608 return __builtin_shuffle (__a, __b, (uint8x16_t)
25609 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25610 #else
25611 return __builtin_shuffle (__a, __b, (uint8x16_t)
25612 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25613 #endif
25616 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25617 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25619 #ifdef __AARCH64EB__
25620 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25621 #else
25622 return __builtin_shuffle (__a, __b, (uint16x8_t)
25623 {4, 12, 5, 13, 6, 14, 7, 15});
25624 #endif
25627 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25628 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25630 #ifdef __AARCH64EB__
25631 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25632 #else
25633 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25634 #endif
25637 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25638 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25640 #ifdef __AARCH64EB__
25641 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25642 #else
25643 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25644 #endif
25647 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25648 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25650 #ifdef __AARCH64EB__
25651 return __builtin_shuffle (__a, __b, (uint8x16_t)
25652 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25653 #else
25654 return __builtin_shuffle (__a, __b, (uint8x16_t)
25655 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25656 #endif
25659 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25660 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25662 #ifdef __AARCH64EB__
25663 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25664 #else
25665 return __builtin_shuffle (__a, __b, (uint16x8_t)
25666 {4, 12, 5, 13, 6, 14, 7, 15});
25667 #endif
25670 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25671 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25673 #ifdef __AARCH64EB__
25674 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25675 #else
25676 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25677 #endif
25680 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25681 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25683 #ifdef __AARCH64EB__
25684 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25685 #else
25686 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25687 #endif
25690 __INTERLEAVE_LIST (zip)
25692 #undef __INTERLEAVE_LIST
25693 #undef __DEFINTERLEAVE
25695 /* End of optimal implementations in approved order. */
25697 #undef __aarch64_vget_lane_any
25698 #undef __aarch64_vget_lane_f32
25699 #undef __aarch64_vget_lane_f64
25700 #undef __aarch64_vget_lane_p8
25701 #undef __aarch64_vget_lane_p16
25702 #undef __aarch64_vget_lane_s8
25703 #undef __aarch64_vget_lane_s16
25704 #undef __aarch64_vget_lane_s32
25705 #undef __aarch64_vget_lane_s64
25706 #undef __aarch64_vget_lane_u8
25707 #undef __aarch64_vget_lane_u16
25708 #undef __aarch64_vget_lane_u32
25709 #undef __aarch64_vget_lane_u64
25711 #undef __aarch64_vgetq_lane_f32
25712 #undef __aarch64_vgetq_lane_f64
25713 #undef __aarch64_vgetq_lane_p8
25714 #undef __aarch64_vgetq_lane_p16
25715 #undef __aarch64_vgetq_lane_s8
25716 #undef __aarch64_vgetq_lane_s16
25717 #undef __aarch64_vgetq_lane_s32
25718 #undef __aarch64_vgetq_lane_s64
25719 #undef __aarch64_vgetq_lane_u8
25720 #undef __aarch64_vgetq_lane_u16
25721 #undef __aarch64_vgetq_lane_u32
25722 #undef __aarch64_vgetq_lane_u64
25724 #undef __aarch64_vdup_lane_any
25725 #undef __aarch64_vdup_lane_f32
25726 #undef __aarch64_vdup_lane_f64
25727 #undef __aarch64_vdup_lane_p8
25728 #undef __aarch64_vdup_lane_p16
25729 #undef __aarch64_vdup_lane_s8
25730 #undef __aarch64_vdup_lane_s16
25731 #undef __aarch64_vdup_lane_s32
25732 #undef __aarch64_vdup_lane_s64
25733 #undef __aarch64_vdup_lane_u8
25734 #undef __aarch64_vdup_lane_u16
25735 #undef __aarch64_vdup_lane_u32
25736 #undef __aarch64_vdup_lane_u64
25737 #undef __aarch64_vdup_laneq_f32
25738 #undef __aarch64_vdup_laneq_f64
25739 #undef __aarch64_vdup_laneq_p8
25740 #undef __aarch64_vdup_laneq_p16
25741 #undef __aarch64_vdup_laneq_s8
25742 #undef __aarch64_vdup_laneq_s16
25743 #undef __aarch64_vdup_laneq_s32
25744 #undef __aarch64_vdup_laneq_s64
25745 #undef __aarch64_vdup_laneq_u8
25746 #undef __aarch64_vdup_laneq_u16
25747 #undef __aarch64_vdup_laneq_u32
25748 #undef __aarch64_vdup_laneq_u64
25749 #undef __aarch64_vdupq_lane_f32
25750 #undef __aarch64_vdupq_lane_f64
25751 #undef __aarch64_vdupq_lane_p8
25752 #undef __aarch64_vdupq_lane_p16
25753 #undef __aarch64_vdupq_lane_s8
25754 #undef __aarch64_vdupq_lane_s16
25755 #undef __aarch64_vdupq_lane_s32
25756 #undef __aarch64_vdupq_lane_s64
25757 #undef __aarch64_vdupq_lane_u8
25758 #undef __aarch64_vdupq_lane_u16
25759 #undef __aarch64_vdupq_lane_u32
25760 #undef __aarch64_vdupq_lane_u64
25761 #undef __aarch64_vdupq_laneq_f32
25762 #undef __aarch64_vdupq_laneq_f64
25763 #undef __aarch64_vdupq_laneq_p8
25764 #undef __aarch64_vdupq_laneq_p16
25765 #undef __aarch64_vdupq_laneq_s8
25766 #undef __aarch64_vdupq_laneq_s16
25767 #undef __aarch64_vdupq_laneq_s32
25768 #undef __aarch64_vdupq_laneq_s64
25769 #undef __aarch64_vdupq_laneq_u8
25770 #undef __aarch64_vdupq_laneq_u16
25771 #undef __aarch64_vdupq_laneq_u32
25772 #undef __aarch64_vdupq_laneq_u64
25774 #endif